import pandas as pd
파이썬을 배웁니다.
그리고 연습장이자 꾸준한 공부를 위해서, 불편하지만 브런치에 글을 써보려고 합니다.
연습을 위한 데이터는 Kaggle의 Predict FIFA 2018 Man of the Match입니다.
import pandas as pd
data = pd.read_csv("FIFA_2018_Statistics.csv")
data.head()
data.columns
Index(['Date', 'Team', 'Opponent', 'Goal Scored', 'Ball Possession %', 'Attempts', 'On-Target', 'Off-Target', 'Blocked', 'Corners', 'Offsides', 'Free Kicks', 'Saves', 'Pass Accuracy %', 'Passes', 'Distance Covered (Kms)', 'Fouls Committed', 'Yellow Card', 'Yellow & Red', 'Red', 'Man of the Match', '1st Goal', 'Round', 'PSO', 'Goals in PSO', 'Own goals', 'Own goal Time'], dtype='object')
data.describe()
data['Team'].head()
0 Russia
1 Saudi Arabia
2 Egypt
3 Uruguay
4 Morocco
Name: Team, dtype: object
data[['Team','Goal Scored']].tail()
Team Goal Scored
123 England 1
124 Belgium 2
125 England 0
126 France 4
127 Croatia 2
data.loc[127]
Date 2018-07-15 00:00:00
Team Croatia
Opponent France
Goal Scored 2
Ball Possession % 61
Attempts 15
On-Target 3
Off-Target 8
Blocked 4
Corners 6
Offsides 1
Free Kicks 15
Saves 3
Pass Accuracy % 83
Passes 547
Distance Covered (Kms) 100
Fouls Committed 13
Yellow Card 1
Yellow & Red 0
Red 0
Man of the Match No
1st Goal 28
Round Final
PSO No
Goals in PSO 0
Own goals NaN
Own goal Time NaN
Host Country Russia
Ball Control Good
Name: 127, dtype: object
data.loc[[0,127]]
data.loc[[0,127],['Team',"Goal Scored"]]
Team Goal Scored
0 Russia 5
127 Croatia 2
data.loc[data["Team"] == "France",["Goal Scored","Round"]]
data2 = data.drop(['Fouls Committed', 'Yellow & Red', 'Man of the Match', '1st Goal', 'PSO','Goals in PSO', 'Own goals', 'Own goal Time'], axis=1)
data2[data2["Team"] == "France"]
data2[data2.Team == 'France']
data2[data2['Team'].isin(['France'])]
data2[data2['Team'].isin(["France","Korea Republic"])]
위에 내용을 풀어서 쓰면
FR_KR = ["France","Korea Republic"]
Team_FR_KR = data2["Team"].isin(FR_KR)
data2[Team_FR_KR]
data_KR = data['Team'].str.contains("Korea")
data[data_KR]
data[(data['Goal Scored'] > 3) & (data['Yellow Card'] == 0) ]
위에 내용을 풀어서 쓰면
Goal_3over = data['Goal Scored'] > 3
YC_0 = data['Yellow Card'] == 0
data[Goal_3over & YC_0]
data2['Date'] = pd.to_datetime(data2['Date'])
data2['Date'].dt.year.head()
0 2018
1 2018
2 2018
3 2018
4 2018
Name: Date, dtype: int64
pd.pivot_table(data2, index='Round', values='Goal Scored')
pd.pivot_table(data2, index=['Team','Round'], values=['Goal Scored','Yellow Card','Red'])
pd.pivot_table(data, index='Round', values=['Goal Scored','Yellow Card','Red'], aggfunc=np.sum)
data['Host Country'] = "Russia"
data['Host Country'].head()
0 Russia
1 Russia
2 Russia
3 Russia
4 Russia
Name: Host Country, dtype: object
data2.loc[data2['Ball Possession %'] > 70 , "Ball Control"] = "Perfect"
data2.loc[(data2['Ball Possession %'] <= 70) & (data2['Ball Possession %'] > 50) , "Ball Control"] = "Good"
data2.loc[(data2['Ball Possession %'] <= 50) & (data2['Ball Possession %'] > 30) , "Ball Control"] = "Bad"
data2.loc[data2['Ball Possession %'] <= 30 , "Ball Control"] = "Worst"
data2[['Ball Possession %','Ball Control']].head(12)