df = pd.read_csv("df_funnel.csv", index_col = 0)
df.shape
# (301861, 7)
df.info()
# <class 'pandas.core.frame.DataFrame'>
# Int64Index: 301861 entries, 0 to 301860
# Data columns (total 7 columns):
# actiontype 301861 non-null object
# ismydoc 301861 non-null object
# ext 301861 non-null object
# sessionid 301861 non-null object
# documentposition 301861 non-null object
# datetime 301861 non-null object
# screen 301861 non-null object
# dtypes: object(7)
# memory usage: 18.4+ MB
df.head(5)
# actiontype ismydoc ext sessionid documentposition datetime screen
# 0 OPEN NoView PDF 9400fd2e43d7dc2d054ca78806236ee1 LOCALSTORAGE 2016.7.18 Per_Dir
# 1 CLOSE NoView PDF 9400fd2e43d7dc2d054ca78806236ee1 LOCALSTORAGE 2016.7.18 Per_Dir
# 2 OPEN View PDF 9400fd2e43d7dc2d054ca78806236ee1 MYPOLARISDRIVE 2016.7.18 Pub_Dir
# 3 CLOSE View PDF 9400fd2e43d7dc2d054ca78806236ee1 MYPOLARISDRIVE 2016.7.18 Pub_Dir
# 4 OPEN NoView PDF f191063c562691041dfa935ff0876975 OTHERAPP 2016.7.6 Main
df.isna().sum()
# actiontype 0
# ismydoc 0
# ext 0
# sessionid 0
# documentposition 0
# datetime 0
# screen 0
# dtype: int64