from pandas import Series,DataFrame
import pandas as pd
In [4]:
obj = Series([1, -2, 3, -4])
obj
Out[4]:
0 1
1 -2
2 3
3 -4
dtype: int64
In [5]:
obj2 = Series([1, -2, 3, -4], index=['a', 'b', 'c', 'd'])
obj2
Out[5]:
a 1
b -2
c 3
d -4
dtype: int64
In [6]:
obj2.values
Out[6]:
array([ 1, -2, 3, -4], dtype=int64)
In [7]:
obj2.index
Out[7]:
Index(['a', 'b', 'c', 'd'], dtype='object')
In [8]:
obj2['b']
Out[8]:
-2
In [10]:
obj2['c'] = 23
obj2[['c', 'd']]
Out[10]:
c 23
d -4
dtype: int64
In [11]:
obj2
Out[11]:
a 1
b -2
c 23
d -4
dtype: int64
In [12]:
obj2[obj2 < 0 ]
Out[12]:
b -2
d -4
dtype: int64
In [13]:
obj2 * 2
Out[13]:
a 2
b -4
c 46
d -8
dtype: int64
In [16]:
import numpy as np
In [18]:
np.abs(obj2)
Out[18]:
a 1
b 2
c 23
d 4
dtype: int64
In [20]:
data = {
'张三':92,
'李四':78,
'王五':68,
'小明':82
}
In [21]:
obj3 = Series(data)
obj3
Out[21]:
小明 82
张三 92
李四 78
王五 68
dtype: int64
In [22]:
names = ['张三', '李四', '王五', '小明']
obj4 = Series(data, index=names)
obj4
Out[22]:
张三 92
李四 78
王五 68
小明 82
dtype: int64
In [23]:
obj4.name = 'math'
obj4.index.name = 'students'
In [24]:
obj4
Out[24]:
students
张三 92
李四 78
王五 68
小明 82
Name: math, dtype: int64
In [1]:
import numpy as np
from pandas import Series,DataFrame
import pandas as pd
In [2]:
data = {
'name':['张三', '李四', '王五', '小明'],
'sex':['female', 'female', 'male', 'male'],
'year':[2001, 2001, 2003, 2002],
'city':['北京', '上海', '广州', '北京']
}
df = DataFrame(data)
df
Out[2]:
city | name | sex | year | |
0 | 北京 | 张三 | female | 2001 |
1 | 上海 | 李四 | female | 2001 |
2 | 广州 | 王五 | male | 2003 |
3 | 北京 | 小明 | male | 2002 |
In [3]:
df = DataFrame(data, columns=['name', 'sex', 'year', 'city'])
df
Out[3]:
name | sex | year | city | |
0 | 张三 | female | 2001 | 北京 |
1 | 李四 | female | 2001 | 上海 |
2 | 王五 | male | 2003 | 广州 |
3 | 小明 | male | 2002 | 北京 |
In [4]:
df = DataFrame(data, columns=['name', 'sex', 'year', 'city'],index=['a', 'b', 'c', 'd'])
df
Out[4]:
name | sex | year | city | |
a | 张三 | female | 2001 | 北京 |
b | 李四 | female | 2001 | 上海 |
c | 王五 | male | 2003 | 广州 |
d | 小明 | male | 2002 | 北京 |
In [5]:
df.index
Out[5]:
Index(['a', 'b', 'c', 'd'], dtype='object')
In [6]:
df.columns
Out[6]:
Index(['name', 'sex', 'year', 'city'], dtype='object')
In [7]:
data2 = {
'sex':{'张三':'female','李四':'female','王五':'male'},
'city':{'张三':'北京','李四':'上海','王五':'广州'}
}
df2 = DataFrame(data2)
df2
Out[7]:
city | sex | |
张三 | 北京 | female |
李四 | 上海 | female |
王五 | 广州 | male |
In [8]:
df.index.name = 'id'
df.columns.name = 'std_info'
In [9]:
df
Out[9]:
std_info | name | sex | year | city |
id | ||||
a | 张三 | female | 2001 | 北京 |
b | 李四 | female | 2001 | 上海 |
c | 王五 | male | 2003 | 广州 |
d | 小明 | male | 2002 | 北京 |
In [10]:
obj = Series([1, -2, 3, -4], index=['a', 'b', 'c', 'd'])
obj
Out[10]:
a 1
b -2
c 3
d -4
dtype: int64
In [11]:
obj.index
Out[11]:
Index(['a', 'b', 'c', 'd'], dtype='object')
In [12]:
df.index
Out[12]:
Index(['a', 'b', 'c', 'd'], dtype='object', name='id')
In [13]:
df.columns
Out[13]:
Index(['name', 'sex', 'year', 'city'], dtype='object', name='std_info')
In [14]:
index = obj.index
index[1] = 'f'
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
in ()
1 index = obj.index
----> 2 index[1] = 'f'
F:Anacondaenvsdata-analysislibsite-packagespandascoreindexesbase.py in __setitem__(self, key, value)
1668
1669 def __setitem__(self, key, value):
-> 1670 raise TypeError("Index does not support mutable operations")
1671
1672 def __getitem__(self, key):
TypeError: Index does not support mutable operations
In [15]:
df
Out[15]:
std_info | name | sex | year | city |
id | ||||
a | 张三 | female | 2001 | 北京 |
b | 李四 | female | 2001 | 上海 |
c | 王五 | male | 2003 | 广州 |
d | 小明 | male | 2002 | 北京 |
In [16]:
'sex' in df.columns
Out[16]:
True
In [17]:
'f' in df.index
Out[17]:
False
In [20]:
obj = Series([1, -2, 3, -4], index=['b', 'a', 'c', 'd'])
obj
Out[20]:
b 1
a -2
c 3
d -4
dtype: int64
In [21]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])
obj2
Out[21]:
a -2.0
b 1.0
c 3.0
d -4.0
e NaN
dtype: float64
In [27]:
obj = Series([1, -2, 3, -4], index=[0,2,3,5])
obj
Out[27]:
0 1
2 -2
3 3
5 -4
dtype: int64
In [28]:
obj2 = obj.reindex(range(6),method='ffill')
obj2
Out[28]:
0 1
1 1
2 -2
3 3
4 3
5 -4
dtype: int64
In [29]:
df = DataFrame(np.arange(9).reshape(3,3),index=['a','c','d'],columns=['name','id','sex'])
df
Out[29]:
name | id | sex | |
a | 0 | 1 | 2 |
c | 3 | 4 | 5 |
d | 6 | 7 | 8 |
In [30]:
df2 = df.reindex(['a', 'b', 'c', 'd'])
df2
Out[30]:
name | id | sex | |
a | 0.0 | 1.0 | 2.0 |
b | NaN | NaN | NaN |
c | 3.0 | 4.0 | 5.0 |
d | 6.0 | 7.0 | 8.0 |
In [31]:
df3 = df.reindex(columns=['name', 'year', 'id'], fill_value=0)
df3
Out[31]:
name | year | id | |
a | 0 | 0 | 1 |
c | 3 | 0 | 4 |
d | 6 | 0 | 7 |
In [49]:
data = {
'name':['张三', '李四', '王五', '小明'],
'grade':[68, 78, 63, 92]
}
df = DataFrame(data)
df
Out[49]:
grade | name | |
0 | 68 | 张三 |
1 | 78 | 李四 |
2 | 63 | 王五 |
3 | 92 | 小明 |
In [50]:
df2 = df.sort_values(by='grade')
df2
Out[50]:
grade | name | |
2 | 63 | 王五 |
0 | 68 | 张三 |
1 | 78 | 李四 |
3 | 92 | 小明 |
In [51]:
df3 = df2.reset_index()
df3
Out[51]:
index | grade | name | |
0 | 2 | 63 | 王五 |
1 | 0 | 68 | 张三 |
2 | 1 | 78 | 李四 |
3 | 3 | 92 | 小明 |
In [52]:
df4 = df2.reset_index(drop=True)
df4
Out[52]:
grade | name | |
0 | 63 | 王五 |
1 | 68 | 张三 |
2 | 78 | 李四 |
3 | 92 | 小明 |
In [45]:
data = {
'name':['张三', '李四', '王五', '小明'],
'sex':['female', 'female', 'male', 'male'],
'year':[2001, 2001, 2003, 2002],
'city':['北京', '上海', '广州', '北京']
}
df = DataFrame(data)
df
Out[45]:
city | name | sex | year | |
0 | 北京 | 张三 | female | 2001 |
1 | 上海 | 李四 | female | 2001 |
2 | 广州 | 王五 | male | 2003 |
3 | 北京 | 小明 | male | 2002 |
In [47]:
df2 = df.set_index('name')
df2
Out[47]:
city | sex | year | |
name | |||
张三 | 北京 | female | 2001 |
李四 | 上海 | female | 2001 |
王五 | 广州 | male | 2003 |
小明 | 北京 | male | 2002 |
In [48]:
df3 = df2.reset_index()
df3
Out[48]:
name | city | sex | year | |
0 | 张三 | 北京 | female | 2001 |
1 | 李四 | 上海 | female | 2001 |
2 | 王五 | 广州 | male | 2003 |
3 | 小明 | 北京 | male | 2002 |
In [1]:
import numpy as np
from pandas import Series,DataFrame
import pandas as pd
In [3]:
obj = Series([1, -2, 3, -4], index=['a', 'b', 'c', 'd'])
obj
Out[3]:
a 1
b -2
c 3
d -4
dtype: int64
In [4]:
obj[1]
Out[4]:
-2
In [5]:
obj['b']
Out[5]:
-2
In [6]:
obj[['a','c']]
Out[6]:
a 1
c 3
dtype: int64
In [7]:
obj[0:2]
Out[7]:
a 1
b -2
dtype: int64
In [8]:
obj['a':'c']
Out[8]:
a 1
b -2
c 3
dtype: int64
In [53]:
data = {
'name':['张三', '李四', '王五', '小明'],
'sex':['female', 'female', 'male', 'male'],
'year':[2001, 2001, 2003, 2002],
'city':['北京', '上海', '广州', '北京']
}
df = DataFrame(data)
df
Out[53]:
city | name | sex | year | |
0 | 北京 | 张三 | female | 2001 |
1 | 上海 | 李四 | female | 2001 |
2 | 广州 | 王五 | male | 2003 |
3 | 北京 | 小明 | male | 2002 |
In [17]:
df['city']
Out[17]:
0 北京
1 上海
2 广州
3 北京
Name: city, dtype: object
In [18]:
df.name
Out[18]:
0 张三
1 李四
2 王五
3 小明
Name: name, dtype: object
In [20]:
df[['city','sex']]
Out[20]:
city | sex | |
0 | 北京 | female |
1 | 上海 | female |
2 | 广州 | male |
3 | 北京 | male |
In [26]:
df2 = df.set_index('name')
df2
Out[26]:
city | sex | year | |
name | |||
张三 | 北京 | female | 2001 |
李四 | 上海 | female | 2001 |
王五 | 广州 | male | 2003 |
小明 | 北京 | male | 2002 |
In [27]:
df2[0:2]
Out[27]:
city | sex | year | |
name | |||
张三 | 北京 | female | 2001 |
李四 | 上海 | female | 2001 |
In [28]:
df2['李四':'王五']
Out[28]:
city | sex | year | |
name | |||
李四 | 上海 | female | 2001 |
王五 | 广州 | male | 2003 |
In [29]:
df2
Out[29]:
city | sex | year | |
name | |||
张三 | 北京 | female | 2001 |
李四 | 上海 | female | 2001 |
王五 | 广州 | male | 2003 |
小明 | 北京 | male | 2002 |
In [31]:
df2.loc['张三']
Out[31]:
city 北京
sex female
year 2001
Name: 张三, dtype: object
In [33]:
df2.loc[['张三','王五']]
Out[33]:
city | sex | year | |
name | |||
张三 | 北京 | female | 2001 |
王五 | 广州 | male | 2003 |
In [35]:
df2.iloc[1]
Out[35]:
city 上海
sex female
year 2001
Name: 李四, dtype: object
In [36]:
df2.iloc[[1,3]]
Out[36]:
city | sex | year | |
name | |||
李四 | 上海 | female | 2001 |
小明 | 北京 | male | 2002 |
In [41]:
df2.ix[['张三','王五'],0:2]
Out[41]:
city | sex | |
name | ||
张三 | 北京 | female |
王五 | 广州 | male |
In [75]:
pd.set_option('mode.chained_assignment',None)
In [43]:
df2.ix[:,['sex','year']] #获取列
Out[43]:
sex | year | |
name | ||
张三 | female | 2001 |
李四 | female | 2001 |
王五 | male | 2003 |
小明 | male | 2002 |
In [44]:
df2.ix[[1,3],:] #获取行
Out[44]:
city | sex | year | |
name | |||
李四 | 上海 | female | 2001 |
小明 | 北京 | male | 2002 |
In [45]:
df2['sex'] == 'female'
Out[45]:
name
张三 True
李四 True
王五 False
小明 False
Name: sex, dtype: bool
In [46]:
df2[df2['sex'] == 'female']
Out[46]:
city | sex | year | |
name | |||
张三 | 北京 | female | 2001 |
李四 | 上海 | female | 2001 |
In [48]:
df2[(df2['sex'] == 'female') & (df2['city'] == '北京')]
Out[48]:
city | sex | year | |
name | |||
张三 | 北京 | female | 2001 |
In [54]:
df
Out[54]:
city | name | sex | year | |
0 | 北京 | 张三 | female | 2001 |
1 | 上海 | 李四 | female | 2001 |
2 | 广州 | 王五 | male | 2003 |
3 | 北京 | 小明 | male | 2002 |
In [57]:
new_data = {
'city':'武汉',
'name':'小李',
'sex':'male',
'year':2002
}
In [59]:
df = df.append(new_data,ignore_index=True) #忽略索引值
df
Out[59]:
city | name | sex | year | |
0 | 北京 | 张三 | female | 2001 |
1 | 上海 | 李四 | female | 2001 |
2 | 广州 | 王五 | male | 2003 |
3 | 北京 | 小明 | male | 2002 |
4 | 武汉 | 小李 | male | 2002 |
In [60]:
df['class'] = 2018
df
Out[60]:
city | name | sex | year | class | |
0 | 北京 | 张三 | female | 2001 | 2018 |
1 | 上海 | 李四 | female | 2001 | 2018 |
2 | 广州 | 王五 | male | 2003 | 2018 |
3 | 北京 | 小明 | male | 2002 | 2018 |
4 | 武汉 | 小李 | male | 2002 | 2018 |
In [61]:
df['math'] = [92,78,58,69,82]
df
Out[61]:
city | name | sex | year | class | math | |
0 | 北京 | 张三 | female | 2001 | 2018 | 92 |
1 | 上海 | 李四 | female | 2001 | 2018 | 78 |
2 | 广州 | 王五 | male | 2003 | 2018 | 58 |
3 | 北京 | 小明 | male | 2002 | 2018 | 69 |
4 | 武汉 | 小李 | male | 2002 | 2018 | 82 |
In [63]:
new_df = df.drop(2) #删除行
new_df
Out[63]:
city | name | sex | year | class | math | |
0 | 北京 | 张三 | female | 2001 | 2018 | 92 |
1 | 上海 | 李四 | female | 2001 | 2018 | 78 |
3 | 北京 | 小明 | male | 2002 | 2018 | 69 |
4 | 武汉 | 小李 | male | 2002 | 2018 | 82 |
In [64]:
new_df = new_df.drop('class',axis=1) #删除列
new_df
Out[64]:
city | name | sex | year | math | |
0 | 北京 | 张三 | female | 2001 | 92 |
1 | 上海 | 李四 | female | 2001 | 78 |
3 | 北京 | 小明 | male | 2002 | 69 |
4 | 武汉 | 小李 | male | 2002 | 82 |
In [65]:
new_df.rename(index={3:2,4:3},columns={'math':'Math'},inplace=True) #inplace可在原数据上修改
new_df
Out[65]:
city | name | sex | year | Math | |
0 | 北京 | 张三 | female | 2001 | 92 |
1 | 上海 | 李四 | female | 2001 | 78 |
2 | 北京 | 小明 | male | 2002 | 69 |
3 | 武汉 | 小李 | male | 2002 | 82 |
In [67]:
obj1 = Series([3.2,5.3,-4.4,-3.7],index=['a','c','g','f'])
obj1
Out[67]:
a 3.2
c 5.3
g -4.4
f -3.7
dtype: float64
In [68]:
obj2 = Series([5.0,-2,4.4,3.4],index=['a','b','c','d'])
obj2
Out[68]:
a 5.0
b -2.0
c 4.4
d 3.4
dtype: float64
In [69]:
obj1 + obj2
Out[69]:
a 8.2
b NaN
c 9.7
d NaN
f NaN
g NaN
dtype: float64
In [70]:
df1 = DataFrame(np.arange(9).reshape(3,3),columns=['a','b','c'], index=['apple','tea','banana'])
df1
Out[70]:
a | b | c | |
apple | 0 | 1 | 2 |
tea | 3 | 4 | 5 |
banana | 6 | 7 | 8 |
In [71]:
df2 = DataFrame(np.arange(9).reshape(3,3),columns=['a','b','d'], index=['apple','tea','coco'])
df2
Out[71]:
a | b | d | |
apple | 0 | 1 | 2 |
tea | 3 | 4 | 5 |
coco | 6 | 7 | 8 |
In [72]:
df1 + df2
Out[72]:
a | b | c | d | |
apple | 0.0 | 2.0 | NaN | NaN |
banana | NaN | NaN | NaN | NaN |
coco | NaN | NaN | NaN | NaN |
tea | 6.0 | 8.0 | NaN | NaN |
In [73]:
df1
Out[73]:
a | b | c | |
apple | 0 | 1 | 2 |
tea | 3 | 4 | 5 |
banana | 6 | 7 | 8 |
In [76]:
s = df1.ix['apple']
s
Out[76]:
a 0
b 1
c 2
Name: apple, dtype: int32
In [77]:
df1 - s
Out[77]:
a | b | c | |
apple | 0 | 0 | 0 |
tea | 3 | 3 | 3 |
banana | 6 | 6 | 6 |
In [78]:
data = {
'fruit':['apple', 'orange', 'grape', 'banana'],
'price':['25元', '42元', '35元', '14元']
}
df1 = DataFrame(data)
df1
Out[78]:
fruit | price | |
0 | apple | 25元 |
1 | orange | 42元 |
2 | grape | 35元 |
3 | banana | 14元 |
In [79]:
def f(x):
return x.split('元')[0]
df1['price'] = df1['price'].map(f)
df1
Out[79]:
fruit | price | |
0 | apple | 25 |
1 | orange | 42 |
2 | grape | 35 |
3 | banana | 14 |
In [80]:
df2 = DataFrame(np.random.randn(3,3),columns=['a','b','c'],index=['app','win','mac'])
df2
Out[80]:
a | b | c | |
app | 1.507962 | -2.140018 | 0.053571 |
win | 0.729671 | 0.207060 | 0.397773 |
mac | -0.191497 | -0.765726 | -0.266327 |
In [81]:
f = lambda x:x.max()-x.min()
df2.apply(f)
Out[81]:
a 1.699460
b 2.347079
c 0.664100
dtype: float64
In [82]:
df2
Out[82]:
a | b | c | |
app | 1.507962 | -2.140018 | 0.053571 |
win | 0.729671 | 0.207060 | 0.397773 |
mac | -0.191497 | -0.765726 | -0.266327 |
In [84]:
df2.applymap(lambda x:'%.2f'%x)
Out[84]:
a | b | c | |
app | 1.51 | -2.14 | 0.05 |
win | 0.73 | 0.21 | 0.40 |
mac | -0.19 | -0.77 | -0.27 |
In [86]:
obj1 = Series([-2,3,2,1],index=['b','a','d','c'])
obj1
Out[86]:
b -2
a 3
d 2
c 1
dtype: int64
In [87]:
obj1.sort_index() #升序
Out[87]:
a 3
b -2
c 1
d 2
dtype: int64
In [88]:
obj1.sort_index(ascending=False) #降序
Out[88]:
d 2
c 1
b -2
a 3
dtype: int64
In [91]:
obj1.sort_values()
Out[91]:
b -2
c 1
d 2
a 3
dtype: int64
In [92]:
df2
Out[92]:
a | b | c | |
app | 1.507962 | -2.140018 | 0.053571 |
win | 0.729671 | 0.207060 | 0.397773 |
mac | -0.191497 | -0.765726 | -0.266327 |
In [93]:
df2.sort_values(by='b')
Out[93]:
a | b | c | |
app | 1.507962 | -2.140018 | 0.053571 |
mac | -0.191497 | -0.765726 | -0.266327 |
win | 0.729671 | 0.207060 | 0.397773 |
In [2]:
df = DataFrame(np.random.randn(9).reshape(3,3),columns=['a','b','c'])
df
Out[2]:
a | b | c | |
0 | 0.660215 | -1.137716 | -0.302954 |
1 | 1.496589 | -0.768645 | -2.091506 |
2 | 0.170316 | -2.682284 | -0.041099 |
In [3]:
df.sum()
Out[3]:
a 2.327120
b -4.588645
c -2.435558
dtype: float64
In [4]:
df.sum(axis=1)
Out[4]:
0 -0.780455
1 -1.363562
2 -2.553067
dtype: float64
In [5]:
data = {
'name':['张三', '李四', '王五', '小明'],
'sex':['female', 'female', 'male', 'male'],
'math':[78, 79, 83, 92],
'city':['北京', '上海', '广州', '北京']
}
df = DataFrame(data)
df
Out[5]:
city | math | name | sex | |
0 | 北京 | 78 | 张三 | female |
1 | 上海 | 79 | 李四 | female |
2 | 广州 | 83 | 王五 | male |
3 | 北京 | 92 | 小明 | male |
In [6]:
df.describe()
Out[6]:
math | |
count | 4.000000 |
mean | 83.000000 |
std | 6.377042 |
min | 78.000000 |
25% | 78.750000 |
50% | 81.000000 |
75% | 85.250000 |
max | 92.000000 |
In [7]:
obj = Series(['a','b','a','c','b'])
obj
Out[7]:
0 a
1 b
2 a
3 c
4 b
dtype: object
In [8]:
obj.unique()
Out[8]:
array(['a', 'b', 'c'], dtype=object)
In [9]:
obj.value_counts()
Out[9]:
a 2
b 2
c 1
dtype: int64
In [11]:
obj = Series(np.random.randn(9),
index=[['one','one','one','two','two','two','three','three','three'],
['a','b','c','a','b','c','a','b','c']])
obj
Out[11]:
one a 0.697195
b -0.887408
c 0.451851
two a 0.390779
b -2.058070
c 0.760594
three a -0.305534
b -0.720491
c -0.259225
dtype: float64
In [12]:
obj.index
Out[12]:
MultiIndex(levels=[['one', 'three', 'two'], ['a', 'b', 'c']],
labels=[[0, 0, 0, 2, 2, 2, 1, 1, 1], [0, 1, 2, 0, 1, 2, 0, 1, 2]])
In [13]:
obj['two']
Out[13]:
a 0.390779
b -2.058070
c 0.760594
dtype: float64
In [15]:
obj[:,'a'] #内层选取
Out[15]:
one 0.697195
two 0.390779
three -0.305534
dtype: float64
In [16]:
df = DataFrame(np.arange(16).reshape(4,4),
index=[['one','one','two','two'],['a','b','a','b']],
columns=[['apple','apple','orange','orange'],['red','green','red','green']])
df
Out[16]:
apple | orange | ||||
red | green | red | green | ||
one | a | 0 | 1 | 2 | 3 |
b | 4 | 5 | 6 | 7 | |
two | a | 8 | 9 | 10 | 11 |
b | 12 | 13 | 14 | 15 |
In [17]:
df['apple']
Out[17]:
red | green | ||
one | a | 0 | 1 |
b | 4 | 5 | |
two | a | 8 | 9 |
b | 12 | 13 |
In [18]:
df.swaplevel(0,1)
Out[18]:
apple | orange | ||||
red | green | red | green | ||
a | one | 0 | 1 | 2 | 3 |
b | one | 4 | 5 | 6 | 7 |
a | two | 8 | 9 | 10 | 11 |
b | two | 12 | 13 | 14 | 15 |
In [19]:
df.sum(level=0)
Out[19]:
apple | orange | |||
red | green | red | green | |
one | 4 | 6 | 8 | 10 |
two | 20 | 22 | 24 | 26 |
In [20]:
df.sum(level=1,axis=1)
Out[20]:
green | red | ||
one | a | 4 | 2 |
b | 12 | 10 | |
two | a | 20 | 18 |
b | 28 | 26 |
In [6]:
import numpy as np
from pandas import Series,DataFrame
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt #导入matplotlib库
%matplotlib inline #魔法函数
In [7]:
s = Series(np.random.normal(size=10))
s
Out[7]:
0 -0.468142
1 -1.408927
2 -0.182548
3 -0.043023
4 0.121437
5 0.539194
6 0.011423
7 -0.938207
8 1.589460
9 0.460753
dtype: float64
In [8]:
s.plot()
Out[8]:
In [10]:
df = DataFrame({'normal': np.random.normal(size=100),
'gamma': np.random.gamma(1, size=100),
'poisson': np.random.poisson(size=100)})
df.cumsum()
Out[10]:
gamma | normal | poisson | |
0 | 1.804045 | 1.788000 | 0.0 |
1 | 1.835715 | 0.089426 | 0.0 |
2 | 3.850210 | 0.870177 | 0.0 |
3 | 6.082898 | 0.902761 | 0.0 |
4 | 8.837446 | 0.959945 | 1.0 |
5 | 9.307126 | 1.658268 | 3.0 |
6 | 9.518029 | 3.118419 | 6.0 |
7 | 9.758011 | 3.861418 | 6.0 |
8 | 10.481856 | 3.405625 | 6.0 |
9 | 12.405202 | 4.892910 | 7.0 |
10 | 13.086167 | 4.776206 | 7.0 |
11 | 13.457807 | 3.217277 | 8.0 |
12 | 13.574663 | 1.821368 | 9.0 |
13 | 13.695523 | 2.829581 | 10.0 |
14 | 13.819044 | 3.015490 | 11.0 |
15 | 15.801080 | 2.629254 | 13.0 |
16 | 17.043867 | 2.052196 | 14.0 |
17 | 17.089774 | 3.687834 | 15.0 |
18 | 17.499338 | 2.635491 | 16.0 |
19 | 18.257891 | 2.636466 | 18.0 |
20 | 19.101743 | 2.272298 | 19.0 |
21 | 24.158020 | -0.113947 | 20.0 |
22 | 25.112218 | -0.594266 | 23.0 |
23 | 25.986628 | -1.326405 | 23.0 |
24 | 28.383365 | -1.349211 | 23.0 |
25 | 28.753694 | -1.527589 | 23.0 |
26 | 28.908734 | -1.312111 | 25.0 |
27 | 30.607696 | 0.228251 | 26.0 |
28 | 31.081009 | 1.067429 | 27.0 |
29 | 31.330353 | 1.098605 | 28.0 |
... | ... | ... | ... |
70 | 72.302929 | 14.123995 | 66.0 |
71 | 72.794689 | 14.860449 | 67.0 |
72 | 73.629651 | 14.828726 | 67.0 |
73 | 74.610837 | 14.168664 | 68.0 |
74 | 78.773897 | 13.334949 | 70.0 |
75 | 80.916582 | 13.722037 | 71.0 |
76 | 81.994526 | 14.717187 | 72.0 |
77 | 83.927355 | 13.784763 | 72.0 |
78 | 86.004903 | 13.343261 | 75.0 |
79 | 86.609627 | 12.151334 | 75.0 |
80 | 87.199249 | 13.345584 | 77.0 |
81 | 87.213180 | 12.311815 | 77.0 |
82 | 87.553190 | 13.864232 | 77.0 |
83 | 89.157662 | 14.439016 | 78.0 |
84 | 89.213456 | 14.401503 | 80.0 |
85 | 89.471336 | 15.838362 | 81.0 |
86 | 89.552332 | 14.406933 | 81.0 |
87 | 91.565291 | 14.520602 | 82.0 |
88 | 94.179919 | 12.017739 | 82.0 |
89 | 95.075841 | 13.279973 | 83.0 |
90 | 95.192719 | 13.089789 | 83.0 |
91 | 96.148316 | 12.268122 | 84.0 |
92 | 97.146898 | 11.830559 | 84.0 |
93 | 97.456375 | 13.035484 | 86.0 |
94 | 99.877122 | 11.966609 | 87.0 |
95 | 103.015620 | 12.313341 | 88.0 |
96 | 103.116648 | 12.715195 | 88.0 |
97 | 103.490265 | 12.168645 | 89.0 |
98 | 103.925893 | 11.502630 | 89.0 |
99 | 105.008619 | 11.193637 | 89.0 |
100 rows × 3 columns
In [11]:
df.cumsum().plot()
Out[11]:
In [12]:
data = {
'name':['张三', '李四', '王五', '小明', 'Peter'],
'sex':['female', 'female', 'male', 'male','male'],
'year':[2001, 2001, 2003, 2002, 2002],
'city':['北京', '上海', '广州', '北京', '北京']
}
df = DataFrame(data)
df
Out[12]:
city | name | sex | year | |
0 | 北京 | 张三 | female | 2001 |
1 | 上海 | 李四 | female | 2001 |
2 | 广州 | 王五 | male | 2003 |
3 | 北京 | 小明 | male | 2002 |
4 | 北京 | Peter | male | 2002 |
In [14]:
df['sex'].value_counts()
Out[14]:
male 3
female 2
Name: sex, dtype: int64
In [16]:
df['sex'].value_counts().plot(kind='bar')
Out[16]:
In [18]:
df2 = DataFrame(np.random.randint(0,100,size=(3,3)),
index=('one','two','three'),
columns = ['A','B','C'])
df2
Out[18]:
A | B | C | |
one | 29 | 5 | 88 |
two | 35 | 42 | 43 |
three | 87 | 85 | 76 |
In [19]:
df2.plot(kind='barh')
Out[19]:
In [20]:
df2.plot(kind='barh',stacked=True,alpha=0.5)
Out[20]:
In [28]:
s = Series(np.random.normal(size=100))
s.hist(bins=20,grid=False)
Out[28]:
In [29]:
s.plot(kind='kde')
Out[29]:
In [31]:
df3 = DataFrame(np.arange(10),columns=['X'])
df3['Y'] = 2 * df3['X'] + 5
df3
Out[31]:
X | Y | |
0 | 0 | 5 |
1 | 1 | 7 |
2 | 2 | 9 |
3 | 3 | 11 |
4 | 4 | 13 |
5 | 5 | 15 |
6 | 6 | 17 |
7 | 7 | 19 |
8 | 8 | 21 |
9 | 9 | 23 |
In [34]:
df3.plot(kind='scatter',x='X',y='Y')
Out[34]:
In [51]:
import numpy as np
from pandas import Series,DataFrame
import pandas as pd
import seaborn as sns #导入seaborn库
In [52]:
tips=sns.load_dataset('tips')
tips.head()
Out[52]:
total_bill | tip | sex | smoker | day | time | size | |
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
In [54]:
tips.shape
Out[54]:
(244, 7)
In [55]:
tips.describe()
Out[55]:
total_bill | tip | size | |
count | 244.000000 | 244.000000 | 244.000000 |
mean | 19.785943 | 2.998279 | 2.569672 |
std | 8.902412 | 1.383638 | 0.951100 |
min | 3.070000 | 1.000000 | 1.000000 |
25% | 13.347500 | 2.000000 | 2.000000 |
50% | 17.795000 | 2.900000 | 2.000000 |
75% | 24.127500 | 3.562500 | 3.000000 |
max | 50.810000 | 10.000000 | 6.000000 |
In [56]:
tips.info()
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
total_bill 244 non-null float64
tip 244 non-null float64
sex 244 non-null category
smoker 244 non-null category
day 244 non-null category
time 244 non-null category
size 244 non-null int64
dtypes: category(4), float64(2), int64(1)
memory usage: 7.2 KB
In [57]:
tips.plot(kind='scatter',x='total_bill',y='tip')
Out[57]:
In [62]:
male_tip = tips[tips['sex'] == 'Male']['tip'].mean()
male_tip
Out[62]:
3.0896178343949052
In [63]:
female_tip = tips[tips['sex'] == 'Female']['tip'].mean()
female_tip
Out[63]:
2.833448275862069
In [66]:
s = Series([male_tip,female_tip],index=['male','female'])
s
Out[66]:
male 3.089618
female 2.833448
dtype: float64
In [67]:
s.plot(kind='bar')
Out[67]:
In [68]:
tips['day'].unique()
Out[68]:
[Sun, Sat, Thur, Fri]
Categories (4, object): [Sun, Sat, Thur, Fri]
In [71]:
sun_tip = tips[tips['day'] == 'Sun']['tip'].mean()
sat_tip = tips[tips['day'] == 'Sat']['tip'].mean()
thur_tip = tips[tips['day'] == 'Thur']['tip'].mean()
fri_tip = tips[tips['day'] == 'Fri']['tip'].mean()
In [72]:
s = Series([thur_tip,fri_tip,sat_tip,sun_tip],index=['Thur','Fri','Sat','Sun'])
s
Out[72]:
Thur 2.771452
Fri 2.734737
Sat 2.993103
Sun 3.255132
dtype: float64
In [73]:
s.plot(kind='bar')
Out[73]:
In [74]:
tips['percent_tip'] = tips['tip']/(tips['total_bill']+tips['tip'])
tips.head(10)
Out[74]:
total_bill | tip | sex | smoker | day | time | size | percent_tip | |
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 | 0.056111 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 | 0.138333 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 | 0.142799 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 | 0.122638 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 | 0.128014 |
5 | 25.29 | 4.71 | Male | No | Sun | Dinner | 4 | 0.157000 |
6 | 8.77 | 2.00 | Male | No | Sun | Dinner | 2 | 0.185701 |
7 | 26.88 | 3.12 | Male | No | Sun | Dinner | 4 | 0.104000 |
8 | 15.04 | 1.96 | Male | No | Sun | Dinner | 2 | 0.115294 |
9 | 14.78 | 3.23 | Male | No | Sun | Dinner | 2 | 0.179345 |
In [76]:
tips['percent_tip'].hist(bins=50)
Out[76]:
页面更新:2024-04-19
本站资料均由网友自行发布提供,仅用于学习交流。如有版权问题,请与我联系,QQ:4156828
© CopyRight 2020-2024 All Rights Reserved. Powered By 71396.com 闽ICP备11008920号-4
闽公网安备35020302034903号