Python数据分析实践Python数据分析实践 (26).pdf
2021/11/21 下午4:365-1-2file:/C:/Users/sgl/Downloads/5-1-2.html1/2In:import pandas as pd pd.set_option(display.unicode.east_asian_width,True)#解决数据输出时列名不对齐的问题 df=pd.read_excel(tdata/cj.xlsx)#读取数据 5.1.2数据清洗-缺失值处理In:import pandas as pd pd.set_option(display.unicode.east_asian_width,True)#解决数据输出时列名不对齐的问题 df=pd.read_excel(tdata/cj.xlsx)#读取数据 In:#存在任一缺失值即删除 df1=df.dropna()print(删除前:,df.shape)print(删除后:,df1.shape)In:#所有列均为缺失值即删除 df1=df.dropna(how=all)print(删除前:,df.shape)print(删除后:,df1.shape)In:#指定列均为缺失值即删除 df1=df.dropna(how=all,subset=专业,选修)print(删除前:,df.shape)print(删除后:,df1.shape)In:#保留某些属性中不存在缺失值的情况 df1=dfdf性别.notnull()print(删除前:,df.shape)print(删除后:,df1.shape)In:#将缺失值NaN填充为0 df选修.fillna(0)In:#将缺失值NaN填充与后面的值相同 df选修.fillna(method=bfill)2021/11/21 下午4:365-1-2file:/C:/Users/sgl/Downloads/5-1-2.html2/2In:import numpy as np#将缺失值NaN填充选修课的平均分 df选修.fillna(np.mean(df选修)-重复值处理In:#去除全部重复数据 df1=df.drop_duplicates()print(去重前:,df.shape)print(去重后:,df1.shape)In:#去除指定列中重复数据 df1=df.drop_duplicates(专业)print(去重前:,df.shape)print(去重后:,df1.shape)In:#去除指定列中重复数据,设置keep参数 df1=df.drop_duplicates(专业,keep=last)print(去重前:,df.shape)print(去重后:,df1.shape)In:#去除指定若干列中重复数据 df1=df.drop_duplicates(学号,姓名)print(去重前:,df.shape)print(去重后:,df1.shape)