2020年3月2日
2.3.10 pandas 1
import numpy as np
import pandas as pd
%matplotlib inline
pd.__version__
'1.0.1'
2.3.1 Object creation
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s
0 1.0
1 3.0
2 5.0
3 NaN
4 6.0
5 8.0
dtype: float64
dates = pd.date_range('20130101', periods=6)
dates
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
'2013-01-05', '2013-01-06'],
dtype='datetime64[ns]', freq='D')
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df
A B C D 2013-01-01 0.109445 0.755078 -0.392128 0.686618 2013-01-02 0.819996 1.133078 1.240428 -0.598558 2013-01-03 -0.517225 1.060575 1.959101 0.624972 2013-01-04 -0.630220 -0.686336 0.396429 -2.254826 2013-01-05 1.685801 -1.213059 1.214066 0.422229 2013-01-06 0.411593 0.672590 0.690132 0.284466
df2 = pd.DataFrame({'A':1.0,'B': pd.Timestamp('20130102'),
'C': pd.Series(1, index=list(range(4)), dtype='float32'),
'D': np.array([3] * 4, dtype='int32'),
'E': pd.Categorical(["test", "train", "test", "train"]),
'F': 'foo'})
df2
A B C D E F 0 1.0 2013-01-02 1.0 3 test foo 1 1.0 2013-01-02 1.0 3 train foo 2 1.0 2013-01-02 1.0 3 test foo 3 1.0 2013-01-02 1.0 3 train foo
df2.dtypes
A float64
B datetime64[ns]
C float32
D int32
E category
F object
dtype: object
df2.A
0 1.0
1 1.0
2 1.0
3 1.0
Name: A, dtype: float64
df2.bool
<bound method NDFrame.bool of A B C D E F
0 1.0 2013-01-02 1.0 3 test foo
1 1.0 2013-01-02 1.0 3 train foo
2 1.0 2013-01-02 1.0 3 test foo
3 1.0 2013-01-02 1.0 3 train foo>
2.3.2 Viewing data
df.head()
A B C D 2013-01-01 0.109445 0.755078 -0.392128 0.686618 2013-01-02 0.819996 1.133078 1.240428 -0.598558 2013-01-03 -0.517225 1.060575 1.959101 0.624972 2013-01-04 -0.630220 -0.686336 0.396429 -2.254826 2013-01-05 1.685801 -1.213059 1.214066 0.422229
df.tail(3)
A B C D 2013-01-04 -0.630220 -0.686336 0.396429 -2.254826 2013-01-05 1.685801 -1.213059 1.214066 0.422229 2013-01-06 0.411593 0.672590 0.690132 0.284466
df.index
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
'2013-01-05', '2013-01-06'],
dtype='datetime64[ns]', freq='D')
df.columns
Index(['A', 'B', 'C', 'D'], dtype='object')
df.to_numpy()
array([[ 0.10944515, 0.75507818, -0.39212779, 0.68661786],
[ 0.8199961 , 1.13307764, 1.24042756, -0.59855844],
[-0.51722489, 1.06057526, 1.95910122, 0.62497238],
[-0.63022037, -0.68633565, 0.39642911, -2.25482556],
[ 1.6858012 , -1.21305901, 1.21406631, 0.4222287 ],
[ 0.41159282, 0.67259021, 0.69013212, 0.28446607]])
df3 = df.to_numpy()
df3
array([[ 0.10944515, 0.75507818, -0.39212779, 0.68661786],
[ 0.8199961 , 1.13307764, 1.24042756, -0.59855844],
[-0.51722489, 1.06057526, 1.95910122, 0.62497238],
[-0.63022037, -0.68633565, 0.39642911, -2.25482556],
[ 1.6858012 , -1.21305901, 1.21406631, 0.4222287 ],
[ 0.41159282, 0.67259021, 0.69013212, 0.28446607]])
df3.dtype
dtype('float64')
df22 = df2.to_numpy()
df22.dtype
dtype('O')
df22
array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
dtype=object)
df.describe()
A B C D count 6.000000 6.000000 6.000000 6.000000 mean 0.313232 0.286988 0.851338 -0.139183 std 0.868530 0.987887 0.811078 1.135563 min -0.630220 -1.213059 -0.392128 -2.254826 25% -0.360557 -0.346604 0.469855 -0.377802 50% 0.260519 0.713834 0.952099 0.353347 75% 0.717895 0.984201 1.233837 0.574286 max 1.685801 1.133078 1.959101 0.686618
df.T
2013-01-01 2013-01-02 2013-01-03 2013-01-04 2013-01-05 2013-01-06 A 0.109445 0.819996 -0.517225 -0.630220 1.685801 0.411593 B 0.755078 1.133078 1.060575 -0.686336 -1.213059 0.672590 C -0.392128 1.240428 1.959101 0.396429 1.214066 0.690132 D 0.686618 -0.598558 0.624972 -2.254826 0.422229 0.284466
ディスカッション
コメント一覧
まだ、コメントがありません