import pandas as pd import numpy as np import matplotlib.pyplot as plt S1 = pd.Series(["a", "b", "c", "d"]) S1 = pd.Series([1,2,3,4], index=["a", "b", "c", "d"]) S1 = pd.Series({"a": 1, "b":2, "c":3, "d":4}) print(S1.index) print(S1.values) #原始数字索引 df1 = pd.DataFrame([["a", "b", "c", "d"], ['A', 'B', 'C', 'D']]) print(df1) df2 = pd.DataFrame([["a", "A"], ["b", "B"], ["c", "C"], ["d", "D"]]) print(df2) #这个有报错 #df3 = pd.DataFrame(("a", "A"), ("b", "B"), ("c", "C"), ("d", "D")) #print(df3) #这个例子不错, 设置列索引 df4 = pd.DataFrame([["a", "A"], ["b", "B"], ["c", "C"]], columns = ["小写", "大写"]) print(df4) #这个例子不错,设置行索引 df5 = pd.DataFrame([["a", "A"], ["b", "B"], ["c", "C"]], index = ["一", "二", "三"]) print(df5) #列名-列值 data = {"小写":["a", "b", "c", "d"], "大写": ["A", "B", "C", "D"]} df6 = pd.DataFrame(data) print(df6) #设置行索引 df7 = pd.DataFrame(data, index = ["(1)", "(2)", "(3)", "(4)"]) print(df7) #获取列索引 print(df7.columns) #获取行索引 print(df7.index)
import pandas as pd #读excel #第二个参数是sheet名或sheet的index df = pd.read_excel(r"C:\Users\clarkhu\Desktop\py\9月数据.xls") print(df) #当指定索引时,索引变成了指定的那一列 df = pd.read_excel(r"C:\Users\clarkhu\Desktop\py\9月数据.xls", sheet_name = 0, index_col = 2) print(df) df = pd.read_excel(r"C:\Users\clarkhu\Desktop\py\9月数据.xls") print(df) df = pd.read_excel(r"C:\Users\clarkhu\Desktop\py\9月数据.xls", sheet_name = "10月") print(df) #展示前几行,默认为5 s = df.head() #打印行列数 print(df.shape) #输出每个字段的类型等信息 df.info() #判断是否有缺失值,如果是缺失值则为True, 否则则为false s = df.isnull() print(s) print(s.shape) #dropna()方法,删除有NaN的行 g1 = df.dropna() print(g1) #删除指定列存在NaN的行 g2 = df.dropna(subset=["性别"]) print(g2) #删除全NaN的行 g3 = df.dropna(how="all") print(g3) #填充缺省值 print('==========g4============') g4 = g3.fillna({"性别":"男", "手机号": "123456789"}) print(g4) #重复值处理 print('xxxxxxxxxxxxxxx') df.drop_duplicates() #删除所有值相同的 df.drop_duplicates(subset = "xxx") #删除单字段相同的行 #keep=first, keep=last, keep=false print(df.drop_duplicates(subset = ["昵称", "性别"],keep="last")) print(df.drop_duplicates(subset = ["昵称", "性别"],keep=False))