pandas 2

2.3.10 pandas 1

import numpy as np
import pandas as pd
%matplotlib inline

pd.__version__

'1.0.1'

2.3.1 Object creation

s = pd.Series([1, 3, 5, np.nan, 6, 8])
s
0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64
dates = pd.date_range('20130101', periods=6)
dates
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
 df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df
ABCD
2013-01-010.1094450.755078-0.3921280.686618
2013-01-020.8199961.1330781.240428-0.598558
2013-01-03-0.5172251.0605751.9591010.624972
2013-01-04-0.630220-0.6863360.396429-2.254826
2013-01-051.685801-1.2130591.2140660.422229
2013-01-060.4115930.6725900.6901320.284466
df2 = pd.DataFrame({'A':1.0,'B': pd.Timestamp('20130102'),
    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
    'D': np.array([3] * 4, dtype='int32'),
    'E': pd.Categorical(["test", "train", "test", "train"]),
    'F': 'foo'})
df2
ABCDEF
01.02013-01-021.03testfoo
11.02013-01-021.03trainfoo
21.02013-01-021.03testfoo
31.02013-01-021.03trainfoo
df2.dtypes
A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object
df2.A
0    1.0
1    1.0
2    1.0
3    1.0
Name: A, dtype: float64
df2.bool
<bound method NDFrame.bool of      A          B    C  D      E    F
0  1.0 2013-01-02  1.0  3   test  foo
1  1.0 2013-01-02  1.0  3  train  foo
2  1.0 2013-01-02  1.0  3   test  foo
3  1.0 2013-01-02  1.0  3  train  foo>

2.3.2 Viewing data

df.head()
ABCD
2013-01-010.1094450.755078-0.3921280.686618
2013-01-020.8199961.1330781.240428-0.598558
2013-01-03-0.5172251.0605751.9591010.624972
2013-01-04-0.630220-0.6863360.396429-2.254826
2013-01-051.685801-1.2130591.2140660.422229
df.tail(3)
ABCD
2013-01-04-0.630220-0.6863360.396429-2.254826
2013-01-051.685801-1.2130591.2140660.422229
2013-01-060.4115930.6725900.6901320.284466
df.index
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
df.columns
Index(['A', 'B', 'C', 'D'], dtype='object')
df.to_numpy()
array([[ 0.10944515,  0.75507818, -0.39212779,  0.68661786],
       [ 0.8199961 ,  1.13307764,  1.24042756, -0.59855844],
       [-0.51722489,  1.06057526,  1.95910122,  0.62497238],
       [-0.63022037, -0.68633565,  0.39642911, -2.25482556],
       [ 1.6858012 , -1.21305901,  1.21406631,  0.4222287 ],
       [ 0.41159282,  0.67259021,  0.69013212,  0.28446607]])
df3 = df.to_numpy()
df3
array([[ 0.10944515,  0.75507818, -0.39212779,  0.68661786],
       [ 0.8199961 ,  1.13307764,  1.24042756, -0.59855844],
       [-0.51722489,  1.06057526,  1.95910122,  0.62497238],
       [-0.63022037, -0.68633565,  0.39642911, -2.25482556],
       [ 1.6858012 , -1.21305901,  1.21406631,  0.4222287 ],
       [ 0.41159282,  0.67259021,  0.69013212,  0.28446607]])
df3.dtype
dtype('float64')
df22 = df2.to_numpy()
df22.dtype
dtype('O')
df22
array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)
df.describe()
ABCD
count6.0000006.0000006.0000006.000000
mean0.3132320.2869880.851338-0.139183
std0.8685300.9878870.8110781.135563
min-0.630220-1.213059-0.392128-2.254826
25%-0.360557-0.3466040.469855-0.377802
50%0.2605190.7138340.9520990.353347
75%0.7178950.9842011.2338370.574286
max1.6858011.1330781.9591010.686618
df.T
2013-01-012013-01-022013-01-032013-01-042013-01-052013-01-06
A0.1094450.819996-0.517225-0.6302201.6858010.411593
B0.7550781.1330781.060575-0.686336-1.2130590.672590
C-0.3921281.2404281.9591010.3964291.2140660.690132
D0.686618-0.5985580.624972-2.2548260.4222290.284466

Follow me!