WEBINTEL GUIDED LAB ACTIVITY Introduction To Pandas
WEBINTEL GUIDED LAB ACTIVITY Introduction To Pandas
In [ ]: # The shape attribute of pandas. DataFrame stores the number of rows and columns
# as a tuple (number of rows, number of columns) .
df.shape
In [ ]: df.shape()
In [ ]: # creating a new data frame which is to stored all the data about country column
country_df = df['country']
In [ ]: # creating a new data frame which is to stored all the data about following column
subset = df[['country', 'continent', 'year']]
In [ ]: # loc is label-based, which means that you have to specify rows and columns based
# on their row and column labels
df.loc[2]
In [ ]: df.loc[[2, 0]]
In [ ]: # iloc is integer index based, so you have to specify rows and columns by their integer index
df.iloc[2]
In [ ]: # ix indexer was an early addition to the library that allowed for flexibility selecting rows and columns
# by either integer location or by label. DEPRICATED
df.ix[2]
In [ ]: # create a new data frame which store all the observation / rows from colums year and pop
subset = df.loc[:, ['year', 'pop']]
In [ ]: # display the year and pop then locate the year which is equal to 1967
df.loc[df['year'] == 1967, ['year', 'pop']]
In [ ]: # display the year and pop then locate the year which is equal to 1967 and pop > 1,000,000
df.loc[(df['year'] == 1967) & (df['pop'] > 1_000_000),
['year', 'pop']]
In [ ]: # display the rows using for loop with specify column name
for index, row in df.iterrows():
print(index, row['country'])
In [ ]: # The describe() method is used for calculating some statistical data like percentile,
# mean and std of the numerical values of the Series or DataFrame.
df.describe()
In [ ]: # display the data frame in descending format using the column 'country'
df.sort_values('country', ascending = False)
In [ ]: # display the data frame using the two columns 'country' (A-Z) and 'pop' (High - Low)
df.sort_values(['country','pop'], ascending = [1,0])
In [ ]: df.head()
In [ ]: # create and save a new text file without index seperated by tab
df.to_csv('modified.txt', index= False, sep = '\t')
In [ ]: # filter the data frame with country does not contains 'Afg'
df.loc[~df['country'].str.contains('Afg')]
In [ ]: df = pandas.read_csv('modified.csv')
In [ ]: df
In [ ]: df['count'] = 1
df
In [ ]: df.groupby(['continent']).count()['count']
In [ ]: