Seaborn¶

Matplotlib을 기반으로 다양한 색상 테마와 통계용 차트 등의 기능을 추가한 시각화 패키지
https://seaborn.github.io/

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

# seaborn 임포트
import seaborn as sns

# 한글 폰트 패스로 지정 
import matplotlib.font_manager as fm
path = 'C:\Windows\Fonts\malgunbd.ttf'
font_name = fm.FontProperties(fname=path).get_name()
plt.rc('font', family=font_name)

# 자동 사이즈 조정 
%matplotlib inline

샘플 데이타프레임 불러오기¶

sns.load_dataset(샘플데이타셋이름)

tips / titanic / iris / flights

tips = sns.load_dataset('tips')
titanic = sns.load_dataset('titanic')
iris = sns.load_dataset('iris')
flights = sns.load_dataset('flights')

tips.shape

(244, 7)

iris.shape

(150, 5)

titanic.head(3)

flights.head()

flights.shape

(144, 3)

Line Plot¶

변수 = plt.subplots()
변수 = sns.lineplot(x='컬럼명1', y='컬럼명2', data=데이타프레임명)
변수.set_title(제목) 
변수.set_xlabel(X축 제목)
변수.set_ylabel(Y축 제목)

tips.head()

ax = plt.subplots()
ax = sns.lineplot(x='tip', y='total_bill', data=tips)
ax.set_title('라인플랏')
ax.set_xlabel('Tip')
ax.set_ylabel('Total Bill')

Text(0, 0.5, 'Total Bill')

displot¶

히스토그램 + 밀집도 그래프¶

밀집도 그래프 - 주어진 데이터를 정규화시켜 넓이가 1이 되도록 그린 그래프 

변수 = plt.subplots() 

변수 = sns.distplot( 데이타프레임명[컬럼명] )

- 밀집도 그래프 제외, 히스토그램만 표시 
변수 = sns.distplot( 데이타프레임명[컬럼명], kde=False) 

- 밀집도 그래프 표시, 히스토그램 제외 
변수 = sns.distplot( 데이타프레임명[컬럼명], hist=False) 

변수.set_title('제목')

ax = plt.subplots()
ax = sns.distplot(tips['total_bill'])

# kde=False 히스토그램만 표시 
# ax = sns.distplot(tips['total_bill'], kde=False)

# hist=False 그래프만 표시 
# ax = sns.distplot(tips['total_bill'], hist=False)

ax.set_title('Total Bill Histogram')

Text(0.5, 1.0, 'Total Bill Histogram')

count Bar¶

특정 데이터값의 이산값을 나타낸 그래프¶

변수 =  plt.subplots()
변수 = sns.countplot( 컬럼명, data=데이타프레임변수 )

변수.set_title(제목) 
변수.set_xlabel(X축 제목)
변수.set_ylabel(Y축 제목)

tips 데이타프레임에서 요일별 데이타값을 막대그래프로 표시하기¶

# 'day' 컬럼값 확인하기 

tips['day'].unique()

[Sun, Sat, Thur, Fri]
Categories (4, object): [Sun, Sat, Thur, Fri]

ax = plt.subplots()
ax = sns.countplot('day', data=tips)

ax.set_title('count of days')
ax.set_xlabel('day of week')
ax.set_ylabel('Frequency')

Text(0, 0.5, 'Frequency')

타이타닉호의 각 클래스별, 승객 수¶

ax = plt.subplots() ax = sns.countplot(x="class", data=titanic) plt.title("타이타닉호의 각 클래스별, 승객 수") plt.show()

titanic.head()

ax = plt.subplots()
ax = sns.countplot(x="sex", data=titanic)
plt.title("타이타닉호의 각 성별, 승객 수")
plt.show()

ax = plt.subplots()
ax = sns.countplot(x="survived", data=titanic)
plt.title("타이타닉호의 생존자와 사망자")

# 막대그래프 라벨 
ax.set_xticklabels(['생존자','사망자'], rotation=45)

plt.show()

산점도 그래프¶

변수 =  plt.subplots()

## fit_reg=True/False : 회귀선 표시 여부 

변수 = sns.regplot(x='컬럼명1', y='컬럼명2', data=데이타프레임명, fit_reg=True/False)

변수.set_title('제목') 
변수.set_xlabel('X축 제목')
변수.set_ylabel('Y축 제목')

ax = plt.subplots()
ax = sns.regplot(x='total_bill', y='tip', data=tips)
# ax = sns.regplot(x='total_bill', y='tip', data=tips, fit_reg = False)

ax.set_title('Scatter of Total bill and Tip') # 제목지정
ax.set_xlabel('Total Bill') 
ax.set_ylabel('tip')

Text(0, 0.5, 'tip')

Boxplot 그래프¶

최솟값, 1분위수, 중간값, 3분위수, 최댓값, 이상치를 표시 

변수 =   plt.subplots()
변수 =   sns.boxplot(x=컬럼명1, y=컬럼명2, data=데이타프레임)

axVar.set_title('Graph Title') # 제목지정
axVar.set_xlabel('xTitle') # X축 제목
axVar.set_ylabel('yTitle') # Y축 제목

ax = plt.subplots()
ax = sns.boxplot(x='time', y='total_bill', data=tips)
ax.set_title('Bar plot of time and total_bill') # 제목지정
ax.set_xlabel('time') # X축 제목
ax.set_ylabel('total_bill') # Y축 제목

Text(0, 0.5, 'total_bill')

pairplot¶

pairplot은 데이터프레임을 인수로 받아 그리드(grid) 형태로 
  각 데이터 열의 조합에 대해 스캐터 플롯을 그린다.

pairplot: http://seaborn.pydata.org/generated/seaborn.pairplot.html


변수 =   plt.subplots()
변수 =   sns.pairplot(데이타프레임, hue=컬럼명, markers=마커리스트 )

iris.head()

# Iris Pair Plot, Hue로 꽃의 종을 시각화
sns.pairplot(iris, hue="species", markers=["o", "s", "D"],
             palette="husl")
plt.show()

	survived	pclass	sex	age	sibsp	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
0	0	3	male	22.0	1	7.2500	S	Third	man	True	NaN	Southampton	no	False
1	1	1	female	38.0	1	71.2833	C	First	woman	False	C	Cherbourg	yes	False
2	1	3	female	26.0	0	7.9250	S	Third	woman	False	NaN	Southampton	yes	True

	year	month	passengers
0	1949	January	112
1	1949	February	118
2	1949	March	132
3	1949	April	129
4	1949	May	121

	total_bill	tip	sex	smoker	day	time	size
0	16.99	1.01	Female	No	Sun	Dinner	2
1	10.34	1.66	Male	No	Sun	Dinner	3
2	21.01	3.50	Male	No	Sun	Dinner	3
3	23.68	3.31	Male	No	Sun	Dinner	2
4	24.59	3.61	Female	No	Sun	Dinner	4

	survived	pclass	sex	age	sibsp	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
0	0	3	male	22.0	1	7.2500	S	Third	man	True	NaN	Southampton	no	False
1	1	1	female	38.0	1	71.2833	C	First	woman	False	C	Cherbourg	yes	False
2	1	3	female	26.0	0	7.9250	S	Third	woman	False	NaN	Southampton	yes	True
3	1	1	female	35.0	1	53.1000	S	First	woman	False	C	Southampton	yes	False
4	0	3	male	35.0	0	8.0500	S	Third	man	True	NaN	Southampton	no	True

	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa

BSH 코딩 블로그

시각화_01. seaborn

Seaborn¶

샘플 데이타프레임 불러오기¶

Line Plot¶

displot¶

히스토그램 + 밀집도 그래프¶

count Bar¶

특정 데이터값의 이산값을 나타낸 그래프¶

tips 데이타프레임에서 요일별 데이타값을 막대그래프로 표시하기¶

타이타닉호의 각 클래스별, 승객 수¶

산점도 그래프¶

Boxplot 그래프¶

pairplot¶

'Python > 시각화' 카테고리의 다른 글

티스토리툴바

시각화_05. 워드클라우드02 (0)	2019.08.26
시각화_04. 워드클라우드01 (0)	2019.08.26
시각화_03 (0)	2019.08.26
시각화_02. matplotlib (0)	2019.08.26

시각화_01. seaborn

Seaborn¶

샘플 데이타프레임 불러오기¶

Line Plot¶

displot¶

히스토그램 + 밀집도 그래프¶

count Bar¶

특정 데이터값의 이산값을 나타낸 그래프¶

tips 데이타프레임에서 요일별 데이타값을 막대그래프로 표시하기¶

타이타닉호의 각 클래스별, 승객 수¶

산점도 그래프¶

Boxplot 그래프¶

pairplot¶

'Python > 시각화' 카테고리의 다른 글

'Python/시각화' Related Articles

티스토리툴바