Python pandas库的用法

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
 
S1 = pd.Series(["a", "b", "c", "d"])
S1 = pd.Series([1,2,3,4], index=["a", "b", "c", "d"])
S1 = pd.Series({"a": 1, "b":2, "c":3, "d":4})
print(S1.index)
print(S1.values)
#原始数字索引
df1 = pd.DataFrame([["a", "b", "c", "d"], ['A', 'B', 'C', 'D']])
print(df1)
df2 = pd.DataFrame([["a", "A"], ["b", "B"], ["c", "C"], ["d", "D"]])
print(df2)
#这个有报错
#df3 = pd.DataFrame(("a", "A"), ("b", "B"), ("c", "C"), ("d", "D"))
#print(df3)
 
#这个例子不错, 设置列索引
df4 = pd.DataFrame([["a", "A"], ["b", "B"], ["c", "C"]], columns = ["小写", "大写"])
print(df4)
 
#这个例子不错,设置行索引
df5 = pd.DataFrame([["a", "A"], ["b", "B"], ["c", "C"]], index = ["一", "二", "三"])
print(df5)

#列名-列值
data = {"小写":["a", "b", "c", "d"], "大写": ["A", "B", "C", "D"]}
df6 = pd.DataFrame(data)
print(df6)
 
#设置行索引
df7 = pd.DataFrame(data, index = ["(1)", "(2)", "(3)", "(4)"])
print(df7)

#获取列索引
print(df7.columns)

#获取行索引
print(df7.index)
import pandas as pd
 
#读excel
#第二个参数是sheet名或sheet的index
df = pd.read_excel(r"C:\Users\clarkhu\Desktop\py\9月数据.xls")
print(df)

#当指定索引时,索引变成了指定的那一列
df = pd.read_excel(r"C:\Users\clarkhu\Desktop\py\9月数据.xls", sheet_name = 0, index_col = 2)
print(df)
df = pd.read_excel(r"C:\Users\clarkhu\Desktop\py\9月数据.xls")
print(df)
df = pd.read_excel(r"C:\Users\clarkhu\Desktop\py\9月数据.xls", sheet_name = "10月")
print(df)
 
#展示前几行,默认为5
s = df.head()

#打印行列数
print(df.shape)
 
#输出每个字段的类型等信息
df.info()
 
#判断是否有缺失值,如果是缺失值则为True, 否则则为false
s = df.isnull()
print(s)
print(s.shape)
 
#dropna()方法,删除有NaN的行
g1 = df.dropna()
print(g1)
 
#删除指定列存在NaN的行
g2 = df.dropna(subset=["性别"])
print(g2)
 
#删除全NaN的行
g3 = df.dropna(how="all")
print(g3)
 
#填充缺省值
print('==========g4============')
g4 = g3.fillna({"性别":"男", "手机号": "123456789"})
print(g4)
 
#重复值处理
print('xxxxxxxxxxxxxxx')
df.drop_duplicates() #删除所有值相同的
df.drop_duplicates(subset = "xxx") #删除单字段相同的行
#keep=first, keep=last, keep=false
print(df.drop_duplicates(subset = ["昵称", "性别"],keep="last"))
print(df.drop_duplicates(subset = ["昵称", "性别"],keep=False))