博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
pandas操作总结
阅读量:3933 次
发布时间:2019-05-23

本文共 23080 字,大约阅读时间需要 76 分钟。

import pandas as pd
# 1查看pandas版本信息pd.__version__
'0.24.2'
# 创建 Series 数据类型# Pandas 中,Series 可以被看作由 1 列数据组成的数据集。# 创建 Series 语法:s = pd.Series(data, index=index),可以通过多种方式进行创建,以下介绍了 3 个常用方法。
# 3.从列表创建Seriesarr = [1,2,3,4]s1 = pd.Series(arr)  # 若没有指定索引,默认从0开始s1
0    11    22    33    4dtype: int64
# 4 .从Ndarray创建Seriesimport numpy as npn = np.random.randn(5) # 随机生成一个数组index = ['a','b','c','d','e']  # 指定了索引s2 = pd.Series(n, index=index)s2
a   -0.583111b   -0.466115c    0.542662d   -0.745683e   -0.529050dtype: float64
# 5.从字典创建Seriesdic = {
'a':1,'b':2,'c':3,'d':4,'e':5}s3 = pd.Series(dic)s3
a    1b    2c    3d    4e    5dtype: int64
# 6. 修改Series索引print(s1)s1.index=['A','B','C','D']s1
0    11    22    33    4dtype: int64A    1B    2C    3D    4dtype: int64
# 7.纵向拼接s4 = s3.append(s1)  # 将s1拼接到s3s4
a    1b    2c    3d    4e    5A    1B    2C    3D    4dtype: int64
print(s4)s4 = s4.drop('e')  # 删除索引为e的值s4
a    1b    2c    3d    4e    5A    1B    2C    3D    4dtype: int64a    1b    2c    3d    4A    1B    2C    3D    4dtype: int64
# 8.Series按指定索引修改元素s4['A'] = 100s4
a      1b      2c      3d      4A    100B      2C      3D      4dtype: int64
# 9.按指定索引查找元素s4['B']
2
# 10 Series切片操作s4[:3] #对s4前三个数据访问
a    1b    2c    3dtype: int64
# 11 加法运算  Series 的加法运算是按照索引计算,如果索引不同则填充为 NaN(空值)。s4.add(s3)
A    NaNB    NaNC    NaND    NaNa    2.0b    4.0c    6.0d    8.0e    NaNdtype: float64
# 13.减法亦是s4.sub(s3)
A    NaNB    NaNC    NaND    NaNa    0.0b    0.0c    0.0d    0.0e    NaNdtype: float64
# 14.乘法s4.mul(s3)
A     NaNB     NaNC     NaND     NaNa     1.0b     4.0c     9.0d    16.0e     NaNdtype: float64
# 15  除法s4.div(s3)
A    NaNB    NaNC    NaND    NaNa    1.0b    1.0c    1.0d    1.0e    NaNdtype: float64
# 16.求中位数print(s4)s4.median()
a      1b      2c      3d      4A    100B      2C      3D      4dtype: int643.0
# 17.求和s4.sum()
119
# 18.19.最大最小值print(s4.max())s4.min()
1001
# 创建 DataFrame 数据类型# 与 Sereis 不同,DataFrame 可以存在多列数据。一般情况下,DataFrame 也更加常用。
# 20. 通过 NumPy 数组创建 DataFramedates = pd.date_range('today', periods=6)  # 定义时间序列作为indexnumbers = np.random.randn(6, 4)columns = ['A','B','C','D']df1 = pd.DataFrame(numbers, index=dates, columns=columns)df1
A B C D
2019-07-16 09:59:10.131414 1.536536 -1.598355 -2.354828 -1.151150
2019-07-17 09:59:10.131414 0.758288 0.143739 -0.389704 0.369642
2019-07-18 09:59:10.131414 -0.612505 0.752261 0.243023 -0.110990
2019-07-19 09:59:10.131414 0.130843 1.308658 0.765599 0.892070
2019-07-20 09:59:10.131414 1.220489 -0.415430 -0.878169 -0.215298
2019-07-21 09:59:10.131414 -0.098756 -2.210043 0.376714 0.521180
# 21.通过字典数组创建DataFramedata = {
'animal':['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'], 'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3], 'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1], 'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']df2 = pd.DataFrame(data, index=labels)df2
animal age visits priority
a cat 2.5 1 yes
b cat 3.0 3 yes
c snake 0.5 2 no
d dog NaN 3 yes
e dog 5.0 2 no
f cat 2.0 3 no
g snake 4.5 1 no
h cat NaN 1 yes
i dog 7.0 2 no
j dog 3.0 1 no
#### 22. 查看 DataFrame 的数据类型df2.dtypes
animal       objectage         float64visits        int64priority     objectdtype: object
# 23. 预览 DataFrame 的前 5 行数据df2.head() # 默认前5
animal age visits priority
a cat 2.5 1 yes
b cat 3.0 3 yes
c snake 0.5 2 no
d dog NaN 3 yes
e dog 5.0 2 no
# 24. 查看 DataFrame 的后 3 行数据df2.tail(3)
animal age visits priority
h cat NaN 1 yes
i dog 7.0 2 no
j dog 3.0 1 no
# 25.查看索引df2.index
Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'], dtype='object')
# 26.查看列名df2.columns
Index(['animal', 'age', 'visits', 'priority'], dtype='object')
# 27. 查看数值df2.values
array([['cat', 2.5, 1, 'yes'],       ['cat', 3.0, 3, 'yes'],       ['snake', 0.5, 2, 'no'],       ['dog', nan, 3, 'yes'],       ['dog', 5.0, 2, 'no'],       ['cat', 2.0, 3, 'no'],       ['snake', 4.5, 1, 'no'],       ['cat', nan, 1, 'yes'],       ['dog', 7.0, 2, 'no'],       ['dog', 3.0, 1, 'no']], dtype=object)
# 28.查看统计数据df2.describe()
age visits
count 8.000000 10.000000
mean 3.437500 1.900000
std 2.007797 0.875595
min 0.500000 1.000000
25% 2.375000 1.000000
50% 3.000000 2.000000
75% 4.625000 2.750000
max 7.000000 3.000000
# 29.转置操作df2.T
a b c d e f g h i j
animal cat cat snake dog dog cat snake cat dog dog
age 2.5 3 0.5 NaN 5 2 4.5 NaN 7 3
visits 1 3 2 3 2 3 1 1 2 1
priority yes yes no yes no no no yes no no
# 30.按列排序df2.sort_values(by='age', ascending=True)  # 默认升序
animal age visits priority
c snake 0.5 2 no
f cat 2.0 3 no
a cat 2.5 1 yes
b cat 3.0 3 yes
j dog 3.0 1 no
g snake 4.5 1 no
e dog 5.0 2 no
i dog 7.0 2 no
d dog NaN 3 yes
h cat NaN 1 yes
# 31.对DataFrame数据切片df2[1:3]
animal age visits priority
b cat 3.0 3 yes
c snake 0.5 2 no
df2['age']  # 32.单列查询
a    2.5b    3.0c    0.5d    NaNe    5.0f    2.0g    4.5h    NaNi    7.0j    3.0Name: age, dtype: float64
df2.age
a    2.5b    3.0c    0.5d    NaNe    5.0f    2.0g    4.5h    NaNi    7.0j    3.0Name: age, dtype: float64
# 33.多列查询df2[['age','animal']]
age animal
a 2.5 cat
b 3.0 cat
c 0.5 snake
d NaN dog
e 5.0 dog
f 2.0 cat
g 4.5 snake
h NaN cat
i 7.0 dog
j 3.0 dog
# 34.通过位置查询df2.iloc[1:3]
animal age visits priority
b cat 3.0 3 yes
c snake 0.5 2 no
df2.loc['c','age']
0.5
df2.loc[:'f',['age','animal']]
age animal
a 2.5 cat
b 3.0 cat
c 0.5 snake
d NaN dog
e 5.0 dog
f 2.0 cat
# 35.副本拷贝df3 = df2.copy()df3
animal age visits priority
a cat 2.5 1 yes
b cat 3.0 3 yes
c snake 0.5 2 no
d dog NaN 3 yes
e dog 5.0 2 no
f cat 2.0 3 no
g snake 4.5 1 no
h cat NaN 1 yes
i dog 7.0 2 no
j dog 3.0 1 no
# 36.判断是否元素为空df3.isnull()  # 空返回True
animal age visits priority
a False False False False
b False False False False
c False False False False
d False True False False
e False False False False
f False False False False
g False False False False
h False True False False
i False False False False
j False False False False
# 37.添加列num= pd.Series([1,2,3,4,5,6,7,8,9,10], index=df3.index)df3['No.'] = numdf3
animal age visits priority No.
a cat 2.5 1 yes 1
b cat 3.0 3 yes 2
c snake 0.5 2 no 3
d dog NaN 3 yes 4
e dog 5.0 2 no 5
f cat 2.0 3 no 6
g snake 4.5 1 no 7
h cat NaN 1 yes 8
i dog 7.0 2 no 9
j dog 3.0 1 no 10
# 39.通过DataFrame的标签对数据进行修改df3.loc['f','age'] = 1.5df3
animal age visits priority No.
a cat 2.5 1 yes 1
b cat 3.0 3 yes 2
c snake 0.5 2 no 3
d dog NaN 3 yes 4
e dog 5.0 2 no 5
f cat 1.5 3 no 6
g snake 4.5 1 no 7
h cat NaN 1 yes 8
i dog 7.0 2 no 9
j dog 3.0 1 no 10
# 40.求平均操作df3.mean()
age       3.375visits    1.900No.       5.500dtype: float64
# 41.对任意列求和df3['visits'].sum()
19
# 42 将字符串转换为小写字母string = pd.Series([    'A','B','asDS',np.nan])print(string)string.str.lower()
0       A1       B2    asDS3     NaNdtype: object0       a1       b2    asds3     NaNdtype: object
# 43.转化为大写string.str.upper()
0       A1       B2    ASDS3     NaNdtype: object
# 44.对缺失值进行填充df4 = df3.copy()print(df4)df4.fillna(value=3)
animal  age  visits priority  No.a    cat  2.5       1      yes    1b    cat  3.0       3      yes    2c  snake  0.5       2       no    3d    dog  NaN       3      yes    4e    dog  5.0       2       no    5f    cat  1.5       3       no    6g  snake  4.5       1       no    7h    cat  NaN       1      yes    8i    dog  7.0       2       no    9j    dog  3.0       1       no   10
animal age visits priority No.
a cat 2.5 1 yes 1
b cat 3.0 3 yes 2
c snake 0.5 2 no 3
d dog 3.0 3 yes 4
e dog 5.0 2 no 5
f cat 1.5 3 no 6
g snake 4.5 1 no 7
h cat 3.0 1 yes 8
i dog 7.0 2 no 9
j dog 3.0 1 no 10
# 45.删除存在缺失值的行df5 = df3.copy()print(df5)df5.dropna(how='any')  # 任何存在nan行的都将被删掉
animal  age  visits priority  No.a    cat  2.5       1      yes    1b    cat  3.0       3      yes    2c  snake  0.5       2       no    3d    dog  NaN       3      yes    4e    dog  5.0       2       no    5f    cat  1.5       3       no    6g  snake  4.5       1       no    7h    cat  NaN       1      yes    8i    dog  7.0       2       no    9j    dog  3.0       1       no   10
animal age visits priority No.
a cat 2.5 1 yes 1
b cat 3.0 3 yes 2
c snake 0.5 2 no 3
e dog 5.0 2 no 5
f cat 1.5 3 no 6
g snake 4.5 1 no 7
i dog 7.0 2 no 9
j dog 3.0 1 no 10
# 46.按指定列对齐l = pd.DataFrame({
'key':['foo1','foo2'],'one':[1,2]})r = pd.DataFrame({
'key':['foo2','foo1'], 'two':[4,5]})print(l)print(r)pd.merge(l,r,on='key') # 按照key列对齐连接,只存在foo2相同,所以最后变成一行
key  one0  foo1    11  foo2    2    key  two0  foo2    41  foo1    5
key one two
0 foo1 1 5
1 foo2 2 4
# 51.建立一个以2019年每一天为索引,职位随机数的Seriesdti = pd.date_range(start='20190101', end='20191231', freq="D")s = pd.Series(np.random.rand(len(dti)), index=dti)s
2019-01-01    0.0376382019-01-02    0.1468352019-01-03    0.6300112019-01-04    0.2253522019-01-05    0.5494222019-01-06    0.1361732019-01-07    0.9760752019-01-08    0.5818662019-01-09    0.6674772019-01-10    0.6164542019-01-11    0.6250502019-01-12    0.1314152019-01-13    0.5588832019-01-14    0.7492712019-01-15    0.6764462019-01-16    0.0841042019-01-17    0.0730562019-01-18    0.2321862019-01-19    0.2133572019-01-20    0.4576642019-01-21    0.5383372019-01-22    0.7284272019-01-23    0.8993022019-01-24    0.8506092019-01-25    0.7165022019-01-26    0.3193392019-01-27    0.5774552019-01-28    0.1269912019-01-29    0.5274392019-01-30    0.551891                ...   2019-12-02    0.2465042019-12-03    0.1175492019-12-04    0.2239772019-12-05    0.4071092019-12-06    0.9538202019-12-07    0.5839622019-12-08    0.0147352019-12-09    0.0091902019-12-10    0.4963302019-12-11    0.1919812019-12-12    0.0029352019-12-13    0.5301972019-12-14    0.3288302019-12-15    0.0816382019-12-16    0.9222512019-12-17    0.3323892019-12-18    0.0765672019-12-19    0.9062162019-12-20    0.4813112019-12-21    0.0804052019-12-22    0.2915322019-12-23    0.9334242019-12-24    0.4397712019-12-25    0.7385652019-12-26    0.2154012019-12-27    0.8496872019-12-28    0.8610602019-12-29    0.8310742019-12-30    0.9443072019-12-31    0.245717Freq: D, Length: 365, dtype: float64
# 52.统计s中每一个周三对应值的和s[s.index.weekday == 2].sum()  # 周一从0开始
28.54901665149845
# 53.统计s中每个月的平均值s.resample('M').mean()
2019-01-31    0.4716942019-02-28    0.5861592019-03-31    0.5152262019-04-30    0.5301702019-05-31    0.4811622019-06-30    0.5455772019-07-31    0.5476922019-08-31    0.4951582019-09-30    0.5471092019-10-31    0.5447062019-11-30    0.4563122019-12-31    0.434590Freq: M, dtype: float64
# 66.条件查找data = {
'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'], 'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3], 'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1], 'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']df = pd.DataFrame(data, index=labels)df[df['age']>3]
animal age visits priority
e dog 5.0 2 no
g snake 4.5 1 no
i dog 7.0 2 no
# 68.多重条件查询
df = pd.DataFrame(data, index=labels)df[(df['animal']=='cat')&(df['age']<3)]
animal age visits priority
a cat 2.5 1 yes
f cat 2.0 3 no
# 64.按关键字查询df3[df3['animal'].isin(['cat','dog'])]
animal age visits priority No.
a cat 2.5 1 yes 1
b cat 3.0 3 yes 2
d dog NaN 3 yes 4
e dog 5.0 2 no 5
f cat 1.5 3 no 6
h cat NaN 1 yes 8
i dog 7.0 2 no 9
j dog 3.0 1 no 10
# 70.按标签名及列名查询df.loc[df2.index[[3,4,8]],['animal','age']]
animal age
d dog NaN
e dog 5.0
i dog 7.0
# 71多条件排序df.sort_values(by=['age','visits'], ascending=[False, True])  # age降序,
animal age visits priority
i dog 7.0 2 no
e dog 5.0 2 no
g snake 4.5 1 no
b cat 3.0 3 yes
j dog 3.0 1 no
a cat 2.5 1 yes
f cat 2.0 3 no
c snake 0.5 2 no
d dog NaN 3 yes
h cat NaN 1 yes
# 73分组求和df4.groupby(by='animal').sum()
age visits No.
animal
cat 7.0 8 17
dog 15.0 8 28
snake 5.0 3 10
# 数据清洗# 88.缺失值拟合# 在`FilghtNumber`中有数值缺失,其中数值为按 10 增长,补充相应的缺省值使得数据完整,并让数据为 `int` 类型。
df = pd.DataFrame({
'From_To': ['LoNDon_paris', 'MAdrid_miLAN', 'londON_StockhOlm', 'Budapest_PaRis', 'Brussels_londOn'], 'FlightNumber': [10045, np.nan, 10065, np.nan, 10085], 'RecentDelays': [[23, 47], [], [24, 43, 87], [13], [67, 32]], 'Airline': ['KLM(!)', '
(12)', '(British Airways. )', '12. Air France', '"Swiss Air"']})df
From_To FlightNumber RecentDelays Airline
0 LoNDon_paris 10045.0 [23, 47] KLM(!)
1 MAdrid_miLAN NaN [] <Air France> (12)
2 londON_StockhOlm 10065.0 [24, 43, 87] (British Airways. )
3 Budapest_PaRis NaN [13] 12. Air France
4 Brussels_londOn 10085.0 [67, 32] "Swiss Air"
df['FlightNumber'] = df['FlightNumber'].interpolate().astype(int)
df
From_To FlightNumber RecentDelays Airline
0 LoNDon_paris 10045 [23, 47] KLM(!)
1 MAdrid_miLAN 10055 [] <Air France> (12)
2 londON_StockhOlm 10065 [24, 43, 87] (British Airways. )
3 Budapest_PaRis 10075 [13] 12. Air France
4 Brussels_londOn 10085 [67, 32] "Swiss Air"
# 89. 数据列拆分# 其中From_to应该为两独立的两列From和To,将From_to依照_拆分为独立两列建立为一个新表。temp = df.From_To.str.split('_',expand=True)temp.columns = ['From','To']temp
From To
0 LoNDon paris
1 MAdrid miLAN
2 londON StockhOlm
3 Budapest PaRis
4 Brussels londOn
# 90. 字符标准化# 其中注意到地点的名字都不规范(如:londON应该为London)需要对数据进行标准化处理。temp['From'] = temp['From'].str.capitalize()temp['To'] = temp['To'].str.capitalize()
temp
From To
0 London Paris
1 Madrid Milan
2 London Stockholm
3 Budapest Paris
4 Brussels London
# 91. 删除坏数据加入整理好的数据# 将最开始的 From_to 列删除,加入整理好的 From 和 to 列。df = df.drop('From_To', axis=1)df = df.join(temp)df
FlightNumber RecentDelays Airline From To
0 10045 [23, 47] KLM(!) London Paris
1 10055 [] <Air France> (12) Madrid Milan
2 10065 [24, 43, 87] (British Airways. ) London Stockholm
3 10075 [13] 12. Air France Budapest Paris
4 10085 [67, 32] "Swiss Air" Brussels London
# 92. 去除多余字符# 如同 airline 列中许多数据有许多其他字符,会对后期的数据分析有较大影响,需要对这类数据进行修正。df['Airline'] = df['Airline'].str.extract(    '([a-zA-Z\s]+)',expand=False).str.strip()df
FlightNumber RecentDelays Airline From To
0 10045 [23, 47] KLM London Paris
1 10055 [] Air France Madrid Milan
2 10065 [24, 43, 87] British Airways London Stockholm
3 10075 [13] Air France Budapest Paris
4 10085 [67, 32] Swiss Air Brussels London
# 93. 格式规范# 在 RecentDelays 中记录的方式为列表类型,由于其长度不一,这会为后期数据分析造成很大麻烦。# 这里将 RecentDelays 的列表拆开,取出列表中的相同位置元素作为一列,若为空值即用 NaN 代替。delays = df['RecentDelays'].apply(pd.Series)delays.columns = ['delay_{}'.format(n) for n in range(1, len(delays.columns)+1)]df = df.drop('RecentDelays', axis=1).join(delays)df
FlightNumber Airline From To delay_1 delay_2 delay_3
0 10045 KLM London Paris 23.0 47.0 NaN
1 10055 Air France Madrid Milan NaN NaN NaN
2 10065 British Airways London Stockholm 24.0 43.0 87.0
3 10075 Air France Budapest Paris 13.0 NaN NaN
4 10085 Swiss Air Brussels London 67.0 32.0 NaN
# 数据预处理# 94. 信息区间划分df=pd.DataFrame({
'name':['Alice','Bob','Candy','Dany','Ella','Frank','Grace','Jenny'],'grades':[58,83,79,65,93,45,61,88]})
def choice(x):    if x> 60:        return 1    else:return 0
df.grades = pd.Series(map(lambda x: choice(x), df.grades))
df
name grades
0 Alice 0
1 Bob 1
2 Candy 1
3 Dany 1
4 Ella 1
5 Frank 0
6 Grace 1
7 Jenny 1
# 95. 数据去重df = pd.DataFrame({
'A':[1,2,3,4,5,4,4,57,8]})df.loc[df['A'].shift() != df['A']]
A
0 1
1 2
2 3
3 4
4 5
5 4
7 57
8 8
# 96. 数据归一化# 有时候,DataFrame 中不同列之间的数据差距太大,需要对其进行归一化处理。# 其中,Max-Min 归一化是简单而常见的一种方式,公式如下:# Y=X−Xmin/Xmax−Xmin
def normalization(df):    numberator = df.sub(df.min())    denominator = (df.max()).sub(df.min())    Y = numberator.div(denominator)    return Y
df = pd.DataFrame(np.random.random(size=(5,3)))print(df)normalization(df)
0         1         20  0.920675  0.181496  0.4081791  0.016837  0.740842  0.2396252  0.577404  0.503003  0.0774013  0.502584  0.262550  0.0008484  0.817712  0.774605  0.073925
0 1 2
0 1.000000 0.000000 1.000000
1 0.000000 0.943074 0.586199
2 0.620207 0.542072 0.187938
3 0.537427 0.136659 0.000000
4 0.886083 1.000000 0.179404
# 97. Series 可视化%matplotlib inline
ts = pd.Series(np.random.randn(100), index=pd.date_range('today', periods=100))ts = ts.cumsum()print(ts)ts.plot()
2019-07-16 11:14:32.969237    -0.1605272019-07-17 11:14:32.969237    -0.4135022019-07-18 11:14:32.969237     0.4949392019-07-19 11:14:32.969237    -0.1783432019-07-20 11:14:32.969237    -1.2798422019-07-21 11:14:32.969237    -0.5389812019-07-22 11:14:32.969237    -1.9527032019-07-23 11:14:32.969237    -2.3508312019-07-24 11:14:32.969237    -2.6524192019-07-25 11:14:32.969237    -4.9768562019-07-26 11:14:32.969237    -5.5969932019-07-27 11:14:32.969237    -4.8806972019-07-28 11:14:32.969237    -5.9182252019-07-29 11:14:32.969237    -4.7202132019-07-30 11:14:32.969237    -4.0562082019-07-31 11:14:32.969237    -3.5266402019-08-01 11:14:32.969237    -2.2955202019-08-02 11:14:32.969237    -0.3818502019-08-03 11:14:32.969237    -0.0779562019-08-04 11:14:32.969237     0.4418312019-08-05 11:14:32.969237    -1.6246912019-08-06 11:14:32.969237    -1.0843162019-08-07 11:14:32.969237    -2.1341242019-08-08 11:14:32.969237    -1.4773982019-08-09 11:14:32.969237    -2.2991942019-08-10 11:14:32.969237    -2.5016632019-08-11 11:14:32.969237    -3.1907932019-08-12 11:14:32.969237    -4.2370492019-08-13 11:14:32.969237    -4.4772302019-08-14 11:14:32.969237    -4.171017                                ...    2019-09-24 11:14:32.969237   -13.5697302019-09-25 11:14:32.969237   -14.6271882019-09-26 11:14:32.969237   -15.4616382019-09-27 11:14:32.969237   -16.1215602019-09-28 11:14:32.969237   -16.5695112019-09-29 11:14:32.969237   -17.9008422019-09-30 11:14:32.969237   -19.1940012019-10-01 11:14:32.969237   -17.9792932019-10-02 11:14:32.969237   -18.6459032019-10-03 11:14:32.969237   -19.2413672019-10-04 11:14:32.969237   -19.2113652019-10-05 11:14:32.969237   -18.0884192019-10-06 11:14:32.969237   -17.7679762019-10-07 11:14:32.969237   -16.2738832019-10-08 11:14:32.969237   -16.7518122019-10-09 11:14:32.969237   -16.4604682019-10-10 11:14:32.969237   -15.5345142019-10-11 11:14:32.969237   -16.0292532019-10-12 11:14:32.969237   -16.6299952019-10-13 11:14:32.969237   -17.1817342019-10-14 11:14:32.969237   -16.1395462019-10-15 11:14:32.969237   -16.2494242019-10-16 11:14:32.969237   -14.7977192019-10-17 11:14:32.969237   -17.1985462019-10-18 11:14:32.969237   -18.1938872019-10-19 11:14:32.969237   -18.1758412019-10-20 11:14:32.969237   -18.0390032019-10-21 11:14:32.969237   -17.8848382019-10-22 11:14:32.969237   -18.9857602019-10-23 11:14:32.969237   -18.987684Freq: D, Length: 100, dtype: float64

在这里插入图片描述

# 98. DataFrame 折线图df = pd.DataFrame(np.random.randn(100, 4), index=ts.index, columns=['A','B','C','D'])df =df.cumsum()print(df)df.plot()
A          B         C         D2019-07-16 11:14:32.969237 -2.311551  -2.601142  0.852766  0.7668992019-07-17 11:14:32.969237 -0.879667  -4.293468 -0.039314  0.8228822019-07-18 11:14:32.969237 -1.249910  -5.562160 -0.456214  0.7208132019-07-19 11:14:32.969237 -0.567523  -5.869549 -1.250540  1.2048542019-07-20 11:14:32.969237  0.000393  -3.939871 -1.824283  1.3779182019-07-21 11:14:32.969237 -1.957763  -4.426390 -1.644319  0.4119902019-07-22 11:14:32.969237 -1.863936  -5.952407 -0.678510  0.8828742019-07-23 11:14:32.969237 -2.047160  -6.771213  1.407736  1.7570212019-07-24 11:14:32.969237 -2.230326  -6.520421  3.122783  2.9760792019-07-25 11:14:32.969237 -3.833992  -6.785455  2.087702  4.0750222019-07-26 11:14:32.969237 -4.315307  -8.567182  2.688330  5.3659912019-07-27 11:14:32.969237 -5.248594  -8.344775  3.382635  4.2149692019-07-28 11:14:32.969237 -5.054369  -7.385112  3.765415  5.0666372019-07-29 11:14:32.969237 -2.931733  -7.085015  3.746368  5.7564382019-07-30 11:14:32.969237 -4.190044  -7.517056  3.133894  8.2179032019-07-31 11:14:32.969237 -3.139043  -8.779127  2.402586  7.8600252019-08-01 11:14:32.969237 -1.870986  -8.921735  2.442751  7.9568242019-08-02 11:14:32.969237 -1.947051  -9.726026  2.805189  8.7300092019-08-03 11:14:32.969237 -2.468689  -7.685965  2.295436  6.7956882019-08-04 11:14:32.969237 -2.138392  -7.481845  3.769528  7.0188162019-08-05 11:14:32.969237 -1.521903  -5.906005  2.340666  7.2808662019-08-06 11:14:32.969237 -0.851497  -5.947501  4.279168  6.2295892019-08-07 11:14:32.969237 -0.745985  -6.307143  5.847261  5.6307052019-08-08 11:14:32.969237 -0.459598  -5.138792  4.995194  5.6479152019-08-09 11:14:32.969237 -0.324185  -5.226607  3.466786  4.2925912019-08-10 11:14:32.969237 -0.352415  -5.121374  3.401821  4.9661652019-08-11 11:14:32.969237  1.123371  -4.678556  2.997400  4.7304022019-08-12 11:14:32.969237  1.621475  -4.918931  1.978229  5.8918172019-08-13 11:14:32.969237  0.528799  -4.923886  1.741921  4.0914292019-08-14 11:14:32.969237  0.234260  -6.577139  3.515839  3.965522...                              ...        ...       ...       ...2019-09-24 11:14:32.969237  2.098830  11.540368 -2.760031  2.0170742019-09-25 11:14:32.969237  1.917497  11.425361 -2.360769  1.5402592019-09-26 11:14:32.969237  1.586440  11.089945 -2.934906  2.0169882019-09-27 11:14:32.969237  2.426352  11.000135 -4.160570  1.6784622019-09-28 11:14:32.969237  2.590117  11.409677 -5.102951  3.1237962019-09-29 11:14:32.969237  2.586017  11.673688 -5.936028  2.1597312019-09-30 11:14:32.969237  5.012078  12.535448 -6.913949  4.0820582019-10-01 11:14:32.969237  3.529943  14.612272 -6.541449  3.1304292019-10-02 11:14:32.969237  3.376133  12.740237 -7.041879  3.0585732019-10-03 11:14:32.969237  3.536676  13.233300 -6.775922  3.5624602019-10-04 11:14:32.969237  5.075667  13.630937 -6.409229  3.4046472019-10-05 11:14:32.969237  4.633807  14.011680 -7.359063  2.5550632019-10-06 11:14:32.969237  4.108268  14.233577 -8.319235  1.7822572019-10-07 11:14:32.969237  5.389960  15.049002 -7.592306  3.0649962019-10-08 11:14:32.969237  4.904890  15.129739 -7.845749  2.1970242019-10-09 11:14:32.969237  2.894357  14.053121 -7.560088  2.1273222019-10-10 11:14:32.969237  2.432563  13.678098 -7.010267  2.5360352019-10-11 11:14:32.969237  1.493160  13.263020 -7.262265  2.9546922019-10-12 11:14:32.969237  2.477873  14.443603 -7.815188  2.4203562019-10-13 11:14:32.969237  1.914146  14.476938 -6.850849  2.9853172019-10-14 11:14:32.969237  1.944343  13.532021 -7.611172  4.7549202019-10-15 11:14:32.969237  2.379594  13.908116 -8.503684  5.2173892019-10-16 11:14:32.969237  1.479926  13.646017 -7.861792  4.7698452019-10-17 11:14:32.969237  3.376088  12.470308 -7.902426  4.7357792019-10-18 11:14:32.969237  3.847433  12.177020 -6.719579  3.1234752019-10-19 11:14:32.969237  3.904511  12.261467 -6.016796  3.4193902019-10-20 11:14:32.969237  3.188237  14.305071 -6.137896  2.9058132019-10-21 11:14:32.969237  4.006034  13.981431 -6.034235  2.4833232019-10-22 11:14:32.969237  4.187015  14.311562 -6.466325  0.5316752019-10-23 11:14:32.969237  4.928834  14.064165 -6.435447 -0.506871[100 rows x 4 columns]

在这里插入图片描述

# 99. DataFrame 散点图df = pd.DataFrame({
"revenue": [57, 68, 63, 71, 72, 90, 80, 62, 59, 51, 47, 52], "advertising": [2.1, 1.9, 2.7, 3.0, 3.6, 3.2, 2.7, 2.4, 1.8, 1.6, 1.3, 1.9], "month": range(12) })ax = df.plot.bar('month','revenue',color='yellow')df.plot('month','advertising', secondary_y=True,ax=ax)

在这里插入图片描述

转载地址:http://xzhgn.baihongyu.com/

你可能感兴趣的文章
C 求最小公倍数算法
查看>>
Ubuntu16.04升级 Ubuntu18.04
查看>>
开发板挂载 ubuntu18.04系统下的 nfs根文件系统失败
查看>>
cmake 判断操作系统平台
查看>>
VIM 编码格式 显示乱码
查看>>
Uboot Makefile 中 $(origin variable)详解
查看>>
BMP 文件格式的详解
查看>>
9针串口引脚定义
查看>>
QT4 QWebView的使用
查看>>
QT QWebView/QWebEngineView使用
查看>>
ARM Linux Kernel 编译结果 Image zImage uImage映像的区别
查看>>
SVN 删除用户名和密码
查看>>
EXPORT_SYMBOL() 错误--warning: type defaults to 'int' in declaration of 'EXPORT_SYMBOL'
查看>>
Qt 使用 QSettings 读写ini文件
查看>>
Uboot LCD 添加进度条功能
查看>>
Git diff 使用 vimdiff 对比差异
查看>>
使用debugfs来调试内核
查看>>
Qt4 程序 QWS 启动参数详解
查看>>
QT 支持鼠标和触摸屏输入
查看>>
svn diff 使用 vimdiff 对比差异
查看>>