8000字 | Python数据可视化,完整版实操指南 !
点击“数据管道”,选择“置顶/星标公众号”
福利干货,第一时间送达
1. 前言
![](https://filescdn.proginn.com/f1b259530c2be9c55da6b1014e96db62/bc33655ae68d5eb8e60c2c328e39430e.webp)
2. pandas
import pandas as pd
df = pd.read_csv('temporal.csv')
df.head(10) #View first 10 data rows
![](https://filescdn.proginn.com/3cfe0ba6874a952949924e55253d53dc/64ecbce566917565465e3a679f3878f1.webp)
df.describe()
![](https://filescdn.proginn.com/55239e064a501e60847ce604fcbf23d1/be3b4417b76448e554c5919fbe45c77f.webp)
df.info()
![](https://filescdn.proginn.com/e117128502affd9ab1e3a0040e24aff1/99c670eaa1009444c6c2c87b7e40eaab.webp)
![](https://filescdn.proginn.com/6cd5234f431988a1dda6493bcab66694/f145242e4979f91bfbf9b76019dbec67.webp)
pd.set_option('display.max_rows',500)
pd.set_option('display.max_columns',500)
pd.set_option('display.width',1000)
format_dict = {'data science':'${0:,.2f}', 'Mes':'{:%m-%Y}', 'machine learning':'{:.2%}'}
#We make sure that the Month column has datetime format
df['Mes'] = pd.to_datetime(df['Mes'])
#We apply the style to the visualization
df.head().style.format(format_dict)
format_dict = {'Mes':'{:%m-%Y}'} #Simplified format dictionary with values that do make sense for our data
df.head().style.format(format_dict).highlight_max(color='darkgreen').highlight_min(color='#ff0000')
![](https://filescdn.proginn.com/dd35d24afe5a759bbe6c9d74054ed1f8/24d665b498d6f9967e55afc08a8d1cf9.webp)
df.head(10).style.format(format_dict).background_gradient(subset=['data science', 'machine learning'], cmap='BuGn')
![](https://filescdn.proginn.com/59cfb28d99791b81fde109c3f8597bcd/a1d491a26b092fa9d903f35b04aa1092.webp)
df.head().style.format(format_dict).bar(color='red', subset=['data science', 'deep learning'])
![](https://filescdn.proginn.com/771c92108595383ae5e5fe125d0955ee/3cd46825e63286082891c4a9d6e8605e.webp)
df.head(10).style.format(format_dict).background_gradient(subset = ['data science','machine learning'],cmap ='BuGn')。highlight_max(color ='yellow')
![](https://filescdn.proginn.com/27e17d6cd65a379281a2c13620729118/51d48f39b5501a743a1b4ffa3aabc7c1.webp)
from pandas_profiling import ProfileReport
prof = ProfileReport(df)
prof.to_file(output_file='report.html')
![](https://filescdn.proginn.com/5b3a28de4ccc561d9e1d374cdbff87c5/ba43f96907a324db4958a52b4e643981.webp)
3. matplotlib
import matplotlib.pyplot as plt
plt.plot(df['Mes'], df['data science'], label='data science')
# The parameter label is to indicate the legend. This doesn't mean that it will be shown, we'll have to use another command that I'll explain later.
![](https://filescdn.proginn.com/deaa4e4226fe9e275d72ee59de6672cf/6913468a4a1162117a446fa2660d7b43.webp)
plt.plot(df ['Mes'],df ['data science'],label ='data science')
plt.plot(df ['Mes'],df ['machine learning'],label ='machine learning ')
plt.plot(df ['Mes'],df ['deep learning'],label ='deep learning')
![](https://filescdn.proginn.com/c1c53e22cc87886b43dbeb86f6f001ff/bb8dc8d5b9c7a8ad416de164a8de4cd5.webp)
plt.plot(df['Mes'], df['data science'], label='data science')
plt.plot(df['Mes'], df['machine learning'], label='machine learning')
plt.plot(df['Mes'], df['deep learning'], label='deep learning')
plt.xlabel('Date')
plt.ylabel('Popularity')
plt.title('Popularity of AI terms by date')
plt.grid(True)
plt.legend()
![](https://filescdn.proginn.com/0416624a30b16ba9996eb5903cc7c379/5ed42cebe82177cfd5ad7b4b29a40578.webp)
fig, axes = plt.subplots(2,2)
axes[0, 0].hist(df['data science'])
axes[0, 1].scatter(df['Mes'], df['data science'])
axes[1, 0].plot(df['Mes'], df['machine learning'])
axes[1, 1].plot(df['Mes'], df['deep learning'])
![](https://filescdn.proginn.com/6854157cf44227ad40d70af54c39ca6b/37ad8b3291bc81d4fd0901af46a3199f.webp)
plt.plot(df ['Mes'],df ['data science'],'r-')
plt.plot(df ['Mes'],df ['data science'] * 2,'bs')
plt .plot(df ['Mes'],df ['data science'] * 3,'g ^')
![](https://filescdn.proginn.com/823240945d75927bc6f80600091c628f/d21131be04cb823e22f29e67981dd2a6.webp)
plt.scatter(df['data science'], df['machine learning'])
![](https://filescdn.proginn.com/d86d228a94e86035ffd297666ba5cdeb/1d5393b9aff65b13cb0fe0e7a60a2631.webp)
plt.bar(df ['Mes'],df ['machine learning'],width = 20)
![](https://filescdn.proginn.com/0c49a870be497a66efb95695d7f0df42/b1442559ab1e72f4b367eef73d1f3d9d.webp)
plt.hist(df ['deep learning'],bins = 15)
![](https://filescdn.proginn.com/1323152599b9d4bcd30d3fd0bbac34e9/651e45c1083976884bc361e02d01c90e.webp)
plt.plot(df['Mes'], df['data science'], label='data science')
plt.plot(df['Mes'], df['machine learning'], label='machine learning')
plt.plot(df['Mes'], df['deep learning'], label='deep learning')
plt.xlabel('Date')
plt.ylabel('Popularity')
plt.title('Popularity of AI terms by date')
plt.grid(True)
plt.text(x='2010-01-01', y=80, s=r'$\lambda=1, r^2=0.8$') #Coordinates use the same units as the graph
plt.annotate('Notice something?', xy=('2014-01-01', 30), xytext=('2006-01-01', 50), arrowprops={'facecolor':'red', 'shrink':0.05}
![](https://filescdn.proginn.com/5dfd8dac24ec49aef3a494b5a3871a1d/a887769a111057219936363d562f6fd2.webp)
4. seaborn
import seaborn as sns
sns.set()
sns.scatterplot(df['Mes'], df['data science'])
![](https://filescdn.proginn.com/263b8ead8a9b341b47a4583d4ddd9dde/d11628da88add609f995287c53958b6b.webp)
sns.relplot(x='Mes', y='deep learning', hue='data science', size='machine learning', col='categorical', data=df)
![](https://filescdn.proginn.com/a4d3bf259ad15716056c46b95e99c225/6920e80f2dbcb0b1d440d2aaaa059922.webp)
sns.heatmap(df.corr(),annot = True,fmt ='。2f')
![](https://filescdn.proginn.com/36cf5ce606d49b8915b72a25cf92784c/0de3b6ea75f6fa6c4fe4b9dfeaf20642.webp)
sns.pairplot(df)
![](https://filescdn.proginn.com/35f75f0a41f896d94c6a073952d712f8/a804bae067ea9bdfc70a4904b6203e76.webp)
sns.pairplot(df,hue ='categorical')
![](https://filescdn.proginn.com/9be1d687c0947a070bf3180392f85511/53587d3923bcc1b2dd513a59168b2964.webp)
sns.jointplot(x='data science', y='machine learning', data=df)
![](https://filescdn.proginn.com/7b353ce2ea8312e91945ff7b0475e970/03d49f8dcdc7b28048831683a2e594ca.webp)
sns.catplot(x='categorical', y='data science', kind='violin', data=df)
![](https://filescdn.proginn.com/d8ba2c1eb1817ca7dfc26e4b7fee508f/9134c4bc85d35a9ba9ab130e5b2f4ef2.webp)
fig, axes = plt.subplots(1, 2, sharey=True, figsize=(8, 4))
sns.scatterplot(x="Mes", y="deep learning", hue="categorical", data=df, ax=axes[0])
axes[0].set_title('Deep Learning')
sns.scatterplot(x="Mes", y="machine learning", hue="categorical", data=df, ax=axes[1])
axes[1].set_title('Machine Learning')
![](https://filescdn.proginn.com/8ab925da4eb5c7836f0aaffc4e2b2571/2bd3cb2ff875e29d0ac5efbc639af51f.webp)
5. Bokeh
from bokeh.plotting import figure, output_file, save
output_file('data_science_popularity.html')
p = figure(title='data science', x_axis_label='Mes', y_axis_label='data science')
p.line(df['Mes'], df['data science'], legend='popularity', line_width=2)
save(p)
![](https://filescdn.proginn.com/401b856fb3ae471a12016046c995a158/a4c4409d9f2051da4c4ce8d82e33123c.webp)
output_file('multiple_graphs.html')
s1 = figure(width=250, plot_height=250, title='data science')
s1.circle(df['Mes'], df['data science'], size=10, color='navy', alpha=0.5)
s2 = figure(width=250, height=250, x_range=s1.x_range, y_range=s1.y_range, title='machine learning') #share both axis range
s2.triangle(df['Mes'], df['machine learning'], size=10, color='red', alpha=0.5)
s3 = figure(width=250, height=250, x_range=s1.x_range, title='deep learning') #share only one axis range
s3.square(df['Mes'], df['deep learning'], size=5, color='green', alpha=0.5)
p = gridplot([[s1, s2, s3]])
save(p)
![](https://filescdn.proginn.com/731f2610c3204b35d9d841d219ded857/30814cccf016a10f05b62b2ff00d2bb7.webp)
6. altair
7. folium
import folium
m1 = folium.Map(location=[41.38, 2.17], tiles='openstreetmap', zoom_start=18)
m1.save('map1.html')
![](https://filescdn.proginn.com/18e412cce544a9d14b3cb689a0628168/4c25656229069f8da929f1e9d57612a3.webp)
m2 = folium.Map(location=[41.38, 2.17], tiles='openstreetmap', zoom_start=16)
folium.Marker([41.38, 2.176], popup='<i>You can use whatever HTML code you want</i>', tooltip='click here').add_to(m2)
folium.Marker([41.38, 2.174], popup='<b>You can use whatever HTML code you want</b>', tooltip='dont click here').add_to(m2)
m2.save('map2.html')
![](https://filescdn.proginn.com/80e2b5be5f6678e61022c7365372cfac/941db52c8109dcd0748fa94231d3d4d9.webp)
from geopandas.tools import geocode
df2 = pd.read_csv('mapa.csv')
df2.dropna(axis=0, inplace=True)
df2['geometry'] = geocode(df2['País'], provider='nominatim')['geometry'] #It may take a while because it downloads a lot of data.
df2['Latitude'] = df2['geometry'].apply(lambda l: l.y)
df2['Longitude'] = df2['geometry'].apply(lambda l: l.x)
![](https://filescdn.proginn.com/0642e363c077ff53e2c89745e530d6d6/9a6392d4e57f1a1291a274cbaabcaffa.webp)
m3 = folium.Map(location=[39.326234,-4.838065], tiles='openstreetmap', zoom_start=3)
def color_producer(val):
if val <= 50:
return 'red'
else:
return 'green'
for i in range(0,len(df2)):
folium.Circle(location=[df2.iloc[i]['Latitud'], df2.iloc[i]['Longitud']], radius=5000*df2.iloc[i]['data science'], color=color_producer(df2.iloc[i]['data science'])).add_to(m3)
m3.save('map3.html')
![](https://filescdn.proginn.com/bc0d304ecd7428cd05eb6d84d4c6b846/7c75a54cb92e2b96740c4982c5ec2802.webp)
![](https://filescdn.proginn.com/0077cffabde56b25657bff38be34cd2e/3ffaeffa7155cbcc025829adaa9bac8e.webp)
推荐阅读
欢迎长按扫码关注「数据管道」
评论