Pandas Datetime4
Pandas Datetime4
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
data
x = pd.datetime.now()
x.month, x.year
(9, 2023)
Out[3]:
In [5]: #4: To get the present time, use Timestamp.now() and then convert timestamp to datetime
Timestamp('2023-09-27 11:25:05.441747')
Out[6]:
In [7]: # Convert timestamp to datetime
t.to_pydatetime()
In [8]: t.year
2023
Out[8]:
In [9]: t.month
9
Out[9]:
In [10]: t.day
27
Out[10]:
In [11]: t.hour
11
Out[11]:
In [12]: t.minute
25
Out[12]:
In [13]: t.second
5
Out[13]:
2020-
1241928 United Kingdom Anguilla 18.220600 -63.068600 13.0 12.0 0.0
12-31
2020-
1241929 United Kingdom Bermuda 32.307800 -64.750500 604.0 445.0 10.0
12-31
2020-
1241930 United Kingdom British Virgin Islands 18.420700 -64.640000 86.0 74.0 1.0
12-31
2020-
1241931 United Kingdom Cayman Islands 19.313300 -81.254600 338.0 294.0 2.0
12-31
2020-
1241932 United Kingdom Channel Islands 49.372300 -2.364400 3058.0 2256.0 58.0
12-31
2020-
1241933 United Kingdom England 52.355500 -1.174300 2139956.0 0.0 64118.0
12-31
2020-
1241935 United Kingdom Gibraltar 36.140800 -5.353600 2040.0 1238.0 7.0
12-31
1241936 United Kingdom Isle of Man 54.236100 -4.548100 377.0 348.0 25.0 2020-
12-31
2020-
1241937 United Kingdom Montserrat 16.742498 -62.187366 13.0 12.0 1.0
12-31
2020-
1241938 United Kingdom Northern Ireland 54.787700 -6.492300 72834.0 0.0 1322.0
12-31
2020-
1241939 United Kingdom Scotland 56.490700 -4.202600 127453.0 0.0 4578.0
12-31
2020-
1241941 United Kingdom Unknown 32.307800 -59.523600 0.0 0.0 0.0
12-31
2020-
1241942 United Kingdom Wales 52.130700 -3.783700 148537.0 0.0 3494.0
12-31
2020-
1241943 Uruguay NaN -32.522800 -55.765800 19119.0 13468.0 181.0
12-31
2020-
1241944 Uzbekistan NaN 41.377491 64.585262 77060.0 74943.0 614.0
12-31
2020-
1241945 Vanuatu NaN -15.376700 166.959200 1.0 1.0 0.0
12-31
2020-
1241946 Venezuela NaN 6.423800 -66.589700 113558.0 107583.0 1028.0
12-31
2020-
1241947 Vietnam NaN 14.058324 108.277199 1465.0 1325.0 35.0
12-31
2020-
1241949 Yemen NaN 15.552727 48.516388 2099.0 1394.0 610.0
12-31
2020-
1241950 Zambia NaN -13.133897 27.849332 20725.0 18660.0 388.0
12-31
2020-
1241951 Zimbabwe NaN -19.015438 29.154857 13867.0 11250.0 363.0
12-31
In [15]: df=df.drop(columns=['Province/State','Latitude','Longitude'])
df.head()
In [16]: df.Date.unique()
array(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
Out[16]:
'2021-01-05', '2021-01-06', '2021-01-07', '2021-01-08',
'2021-01-09', '2021-01-10', '2021-01-11', '2021-01-12',
'2021-01-13', '2021-01-14', '2021-01-15', '2021-01-16',
'2021-01-17', '2021-01-18', '2021-01-19', '2021-01-20',
'2021-01-21', '2020-01-22', '2021-01-22', '2020-01-23',
'2021-01-23', '2020-01-24', '2021-01-24', '2020-01-25',
'2021-01-25', '2020-01-26', '2021-01-26', '2020-01-27',
'2021-01-27', '2020-01-28', '2021-01-28', '2020-01-29',
'2021-01-29', '2020-01-30', '2021-01-30', '2020-01-31',
'2021-01-31', '2020-02-01', '2021-02-01', '2020-02-02',
'2021-02-02', '2020-02-03', '2021-02-03', '2020-02-04',
'2021-02-04', '2020-02-05', '2021-02-05', '2020-02-06',
'2021-02-06', '2020-02-07', '2021-02-07', '2020-02-08',
'2021-02-08', '2020-02-09', '2021-02-09', '2020-02-10',
'2021-02-10', '2020-02-11', '2021-02-11', '2020-02-12',
'2021-02-12', '2020-02-13', '2020-02-14', '2020-02-15',
'2020-02-16', '2020-02-17', '2020-02-18', '2020-02-19',
'2020-02-20', '2020-02-21', '2020-02-22', '2020-02-23',
'2020-02-24', '2020-02-25', '2020-02-26', '2020-02-27',
'2020-02-28', '2020-02-29', '2020-03-01', '2020-03-02',
'2020-03-03', '2020-03-04', '2020-03-05', '2020-03-06',
'2020-03-07', '2020-03-08', '2020-03-09', '2020-03-10',
'2020-03-11', '2020-03-12', '2020-03-13', '2020-03-14',
'2020-03-15', '2020-03-16', '2020-03-17', '2020-03-18',
'2020-03-19', '2020-03-20', '2020-03-21', '2020-03-22',
'2020-03-23', '2020-03-24', '2020-03-25', '2020-03-26',
'2020-03-27', '2020-03-28', '2020-03-29', '2020-03-30',
'2020-03-31', '2020-04-01', '2020-04-02', '2020-04-03',
'2020-04-04', '2020-04-05', '2020-04-06', '2020-04-07',
'2020-04-08', '2020-04-09', '2020-04-10', '2020-04-11',
'2020-04-12', '2020-04-13', '2020-04-14', '2020-04-15',
'2020-04-16', '2020-04-17', '2020-04-18', '2020-04-19',
'2020-04-20', '2020-04-21', '2020-04-22', '2020-04-23',
'2020-04-24', '2020-04-25', '2020-04-26', '2020-04-27',
'2020-04-28', '2020-04-29', '2020-04-30', '2020-05-01',
'2020-05-02', '2020-05-03', '2020-05-04', '2020-05-05',
'2020-05-06', '2020-05-07', '2020-05-08', '2020-05-09',
'2020-05-10', '2020-05-11', '2020-05-12', '2020-05-13',
'2020-05-14', '2020-05-15', '2020-05-16', '2020-05-17',
'2020-05-18', '2020-05-19', '2020-05-20', '2020-05-21',
'2020-05-22', '2020-05-23', '2020-05-24', '2020-05-25',
'2020-05-26', '2020-05-27', '2020-05-28', '2020-05-29',
'2020-05-30', '2020-05-31', '2020-06-01', '2020-06-02',
'2020-06-03', '2020-06-04', '2020-06-05', '2020-06-06',
'2020-06-07', '2020-06-08', '2020-06-09', '2020-06-10',
'2020-06-11', '2020-06-12', '2020-06-13', '2020-06-14',
'2020-06-15', '2020-06-16', '2020-06-17', '2020-06-18',
'2020-06-19', '2020-06-20', '2020-06-21', '2020-06-22',
'2020-06-23', '2020-06-24', '2020-06-25', '2020-06-26',
'2020-06-27', '2020-06-28', '2020-06-29', '2020-06-30',
'2020-07-01', '2020-07-02', '2020-07-03', '2020-07-04',
'2020-07-05', '2020-07-06', '2020-07-07', '2020-07-08',
'2020-07-09', '2020-07-10', '2020-07-11', '2020-07-12',
'2020-07-13', '2020-07-14', '2020-07-15', '2020-07-16',
'2020-07-17', '2020-07-18', '2020-07-19', '2020-07-20',
'2020-07-21', '2020-07-22', '2020-07-23', '2020-07-24',
'2020-07-25', '2020-07-26', '2020-07-27', '2020-07-28',
'2020-07-29', '2020-07-30', '2020-07-31', '2020-08-01',
'2020-08-02', '2020-08-03', '2020-08-04', '2020-08-05',
'2020-08-06', '2020-08-07', '2020-08-08', '2020-08-09',
'2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13',
'2020-08-14', '2020-08-15', '2020-08-16', '2020-08-17',
'2020-08-18', '2020-08-19', '2020-08-20', '2020-08-21',
'2020-08-22', '2020-08-23', '2020-08-24', '2020-08-25',
'2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29',
'2020-08-30', '2020-08-31', '2020-09-01', '2020-09-02',
'2020-09-03', '2020-09-04', '2020-09-05', '2020-09-06',
'2020-09-07', '2020-09-08', '2020-09-09', '2020-09-10',
'2020-09-11', '2020-09-12', '2020-09-13', '2020-09-14',
'2020-09-15', '2020-09-16', '2020-09-17', '2020-09-18',
'2020-09-19', '2020-09-20', '2020-09-21', '2020-09-22',
'2020-09-23', '2020-09-24', '2020-09-25', '2020-09-26',
'2020-09-27', '2020-09-28', '2020-09-29', '2020-09-30',
'2020-10-01', '2020-10-02', '2020-10-03', '2020-10-04',
'2020-10-05', '2020-10-06', '2020-10-07', '2020-10-08',
'2020-10-09', '2020-10-10', '2020-10-11', '2020-10-12',
'2020-10-13', '2020-10-14', '2020-10-15', '2020-10-16',
'2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20',
'2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24',
'2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28',
'2020-10-29', '2020-10-30', '2020-10-31', '2020-11-01',
'2020-11-02', '2020-11-03', '2020-11-04', '2020-11-05',
'2020-11-06', '2020-11-07', '2020-11-08', '2020-11-09',
'2020-11-10', '2020-11-11', '2020-11-12', '2020-11-13',
'2020-11-14', '2020-11-15', '2020-11-16', '2020-11-17',
'2020-11-18', '2020-11-19', '2020-11-20', '2020-11-21',
'2020-11-22', '2020-11-23', '2020-11-24', '2020-11-25',
'2020-11-26', '2020-11-27', '2020-11-28', '2020-11-29',
'2020-11-30', '2020-12-01', '2020-12-02', '2020-12-03',
'2020-12-04', '2020-12-05', '2020-12-06', '2020-12-07',
'2020-12-08', '2020-12-09', '2020-12-10', '2020-12-11',
'2020-12-12', '2020-12-13', '2020-12-14', '2020-12-15',
'2020-12-16', '2020-12-17', '2020-12-18', '2020-12-19',
'2020-12-20', '2020-12-21', '2020-12-22', '2020-12-23',
'2020-12-24', '2020-12-25', '2020-12-26', '2020-12-27',
'2020-12-28', '2020-12-29', '2020-12-30', '2020-12-31'],
dtype=object)
In [17]: df['Date']=pd.to_datetime(df.Date)
df.head()
In [18]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1241952 entries, 0 to 1241951
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Country/Region 1070891 non-null object
1 Confirmed 1241933 non-null float64
2 Recovered 1241566 non-null float64
3 Deaths 1241520 non-null float64
4 Date 1241952 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(3), object(1)
memory usage: 47.4+ MB
In [20]: df.month.replace({
1:'January',
2:'February',
3:'March',
4:'April',
5:'May',
6:'June',
7:'July',
8:'August',
9:'September',
10:'October',
11:'November',
12:'December'
},inplace=True)
df.sample(10)
EDA
rename column Country/Region to Region
In [21]: df.rename(columns={'Country/Region':'Region'},inplace=True)
df.head()
In [22]: df.query('month=="January"')['Recovered'].sum()
1614355505.0
Out[22]:
In [23]: df.query('month=="March"')['Confirmed'].sum()
8900596.0
Out[23]:
In [27]: tot_death=df.groupby(['Region'])['Deaths'].sum()
tot_death
Region
Out[27]:
Afghanistan 297323.0
Albania 88375.0
Algeria 377806.0
Andorra 15492.0
Angola 36818.0
...
Vietnam 4908.0
West Bank and Gaza 74931.0
Yemen 108944.0
Zambia 53042.0
Zimbabwe 36683.0
Name: Deaths, Length: 218, dtype: float64
In [28]: df.columns
Index(['Region', 'Confirmed', 'Recovered', 'Deaths', 'Date', 'year', 'month',
Out[28]:
'day'],
dtype='object')
In [29]: tot_death.max()
46511368.0
Out[29]:
In [30]: tot_death.mean()
1024449.7889908256
Out[30]:
In [31]: df.query('Region=="China"')['Deaths'].sum()
1415355.0
Out[31]:
In [32]: df.query('Region=="India"')['Recovered'].sum
In [33]: mean_sum=df.groupby(['Region'])['Confirmed'].agg(['mean','sum'])
mean_sum
Region
In [34]: mean_sum1=df.groupby(['Region'])['Recovered'].agg(['mean','sum'])
mean_sum1
Region
In [35]: mean_sum1=df.groupby(['Region'])['Deaths'].agg(['mean','sum'])
mean_sum1
Region
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1241952 entries, 0 to 1241951
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Region 1070891 non-null object
1 Confirmed 1241933 non-null float64
2 Recovered 1241566 non-null float64
3 Deaths 1241520 non-null float64
4 Date 1241952 non-null datetime64[ns]
5 year 1241952 non-null int64
6 month 1241952 non-null object
7 day 1241952 non-null int64
dtypes: datetime64[ns](1), float64(3), int64(2), object(2)
memory usage: 75.8+ MB
In [ ]: