Python-Pandas Notes
Python-Pandas Notes
/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
PANDAS
Video link: []
"""
"""
---------PLOTING MULTIPLE KINDS OF GRAPHS IN ONE
FIGURE-----------------
"""
import pandas as pd
import matplotlib.pyplot as plt
"""
---------SERIES--------------
"""
z=pd.Series({'a':1,'b':2,'g':3})
print(z) #will give
#a 1
#b 2
#g 3
"""
--------DATA FRAME(MAKING A TABLE(defiing rows and
columns))------------------------------
we can make a table using pandas by creating a dictionary and
defining the variables
"""
#first define a data frame and that assigns the colums and the rows
if you first define the column heading and then its contents
df=pd.DataFrame({'Province':['S','P','B','KPK'],'Population':
[1000,2000,3000,4566]})
print(df)
print(df.Population) #to get the content of the population only
print(df.loc[0]) #will tell the information of the first in province
and its corresponding population, in this case: Sindh: 1000
print(df['Province']) #another way of getting a column of elements
df.columns=['Pov','Pops'] #this will change column headings
df.index=['a','b','c','d'] #change index from no. to this
print(df.pop) #to view population column, can be done for pops also
print(df.loc['a']) #city S
#pop 1000 for these
print(df.info()) #for all the info
"""
---------WRITING TO THE FILES--------
"""
df=pd.DataFrame({'Province':['S','P','B','KPK'],'Population':
[1000,2000,3000,4566]})
df_1=df.to_excel('test.xlsx')
df_2=df.to_excel('test.xlsx',index='False') #the index will not
appear
"""
---------MAKE CHANGES IN EXCEL FILE---------
"""
data=pd.read_excel('sample.xlsx')
#then set an index according to the column you want (better if not a
number)
data_new=data.set_index(['Ship
Mode','Customer']).sort_index(level=0) #this made sure that both are
side by side
#to find the values & data within an index
print(data_new.loc['Regular Air'])
#if there were two strings you could have done
print(data_new.loc['Regular Air','Barry French'])
fig,axes=plt.subplots(1,2,figsize=(12,5))
customer_count.plot(ax=axes[0],kind='bar') #for the customer count
axes[0].set_ylabel('customer') #set y label
"""
-------ADDRESS MISSING NUMBERS-----------
"""
sample2=pd.read_excel('sample2.xlsx')
print(sample2['Unit Price'].isnull()) #if True: means that those
values are null
#so we fill those numbers with 0 by
print(sample2['Unit Price'].fillna(0))
#if you want to fill the data with the forward values
print(sample2['Unit Price'].ffill(0))
"""
---------DRAWING GRAPHS OF DATA FRAME(2D FIGS)-----------
"""
s1.index=['a','b']
print(s1)
"""
-----CONVERTING FLOAT TO NUMBERS(by adding new colums)-----
make new columns by editing the existing one
"""
#lambda tells python that we are finding a function
y=lambda x: x**2+1
k=pd.DataFrame({'Cities':pd.Series(['khi','lahore']),'Age':pd.Series
(['30','20'])})
#so here, we will add a new column by lambda
k['NuAge']=k.Age.apply(lambda x: int(x.replace(',','')))
#this will duplicate your existing column and make new one with
edits
"""
--------ORGANIZING DATA------
if you want to organize a particular column in aligned format
"""
k['Cities']=k.Cities.apply(lambda x: x.strip())