# 1.安装 pandas处理excel需要xlrd、openpyxl依赖包
1
2
3
pip install pandas
pip install xlrd
pip install openpyxl

2.pandas获取csv\Excel的第一行(列标签)#

一般来说excel第一行是列标签,平时读取第一行也是从数据行来读取的,那么如何读取第一行列标签呢

1
2
3
4
>>> import pandas as pd
>>> path="I:\\download\\临时\\test.xlsx"
>>> data=pd.read_excel(path)
>>> list(data.columns.values)

这样就能拿到所有的列标签

3.获取excel的最大行和最大列#

==注意:这里获取到的行是不包括第一行(标签名)==
image

1
2
3
4
5
6
7
8
9
10
11
12
>>> import pandas as pd
>>> path="I:\\download\\临时\\test.xlsx"
>>> data=pd.read_excel(path)
#获取最大行,不包括标签
>>> nrows=data.shape[0]
5
#获取最大列
>>> ncols=data.columns.size
100

print('Max Rows:'+str(nrows))
print('Max Columns'+str(ncols))

4.显示列名及对应序号#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/usr/bin/evn python
#-*-coding:utf-8-*-

import pandas as pd

path="I:\\download\\临时\\test.xlsx"

data=pd.read_excel(path)
nrows=data.shape[0]
ncols=data.columns.size

#显示列名,并显示列名序号
for iCol in range(ncols):
print(str(iCol)+":"+data.columns[iCol])

运行结果:

1
2
3
4
5
6
0:Chr
1:Start
2:Stop
3:Ref
4:Call
5:MapLoc

5.打印指定单元格#

image
打印第二行,第四列

1
2
3
4
5
6
7
8
9
10
11
12
13
#!/usr/bin/evn python
#-*-coding:utf-8-*-

import pandas as pd

path="I:\\download\\临时\\test.xlsx"

data=pd.read_excel(path)
nrows=data.shape[0]
ncols=data.columns.size

#打印指定单元格
print(data.iloc[0,3])

运行结果:

1
C  #打印正确

6.根据列名查看列的内容#

image

1
2
3
4
5
6
7
8
9
#!/usr/bin/evn python
#-*-coding:utf-8-*-

import pandas as pd

path="I:\\download\\临时\\test.xlsx"
data=pd.read_excel(path)
#查看指定列的内容
print(data['Ref'])

运行结果:

1
2
3
4
5
6
0    C
1 T
2 G
3 G
4 G
Name: Ref, dtype: object

7.根据列的序号查看指定列内容#

image
方法一

1
2
3
4
5
6
7
8
9
#!/usr/bin/evn python
#-*-coding:utf-8-*-
import pandas as pd

path="I:\\download\\临时\\test.xlsx"
data=pd.read_excel(path)
#根据列的序号查看指定列:第四列的内容
sColumnName=data.columns[3]
print(data[sColumnName])

运行结果:

1
2
3
4
5
6
0    C
1 T
2 G
3 G
4 G
Name: Ref, dtype: object

方法二

1
2
3
4
5
6
7
8
#!/usr/bin/evn python
#-*-coding:utf-8-*-
import pandas as pd

path="I:\\download\\临时\\test.xlsx"
data=pd.read_excel(path,sheet_name="SmallVariations")
#查看某一列的值
print(data['Ref'].values)

这种方法更为简便,注意读取excel时如果要用这种方法,就不能添加header=None,不然会报错,运行结果:

1
['C' 'T' 'G' 'G' 'G']

8.查看固定行的内容#

image
方法一

1
2
3
4
5
6
7
8
9
10
11
12
#!/usr/bin/evn python
#-*-coding:utf-8-*-
import pandas as pd

path="I:\\download\\临时\\test.xlsx"
data=pd.read_excel(path)
nrows=data.shape[0]
ncols=data.columns.size
#查看某行的内容(第四行,行从0开始)
iRow=3
for iCol in range(ncols):
print(data.iloc[iRow,iCol])

运行结果:

1
2
3
4
5
6
7
8
9
10
11
1
11190610
11190611
G
A
1p36.22
snv
SomMajor
NDPLow
...
..

方法二

1
2
3
4
5
6
7
8
#!/usr/bin/evn python
#-*-coding:utf-8-*-
import pandas as pd

path="I:\\download\\临时\\test.xlsx"
data=pd.read_excel(path,sheet_name="SmallVariations",header=None)
# 查看第一行
print(data.values[0])

这种方法更为简便,运行结果:

1
2
3
['Chr' 'Start' 'Stop' 'Ref' 'Call' 'MapLoc' 'VarType' 'VarTag'
'siteFilter' 'SomConfLev' 'SomNotes' 'caseRsAD' 'caseAD' 'caseAF'
'caseDP'..]

9.遍历整个表格#

方法一

1
2
3
4
5
6
7
8
9
10
11
12
#!/usr/bin/evn python
#-*-coding:utf-8-*-
import pandas as pd

path="I:\\download\\临时\\test.xlsx"
data=pd.read_excel(path)
nrows=data.shape[0]
ncols=data.columns.size
#遍历整个表格
for iRow in range(nrows):
for iCol in range(ncols):
print(data.iloc[iRow,iCol])

方法二

1
2
3
4
5
6
7
8
#!/usr/bin/evn python
#-*-coding:utf-8-*-
import pandas as pd

path="I:\\download\\临时\\test.xlsx"
data=pd.read_excel(path,sheet_name="SmallVariations",header=None)
# 查看所有的值
print(data.values)

这种方法更为直观、简便,运行结果:

1
2
3
[['Chr' 'Start' 'Stop' 'Ref' 'Call' 'MapLoc' 'VarType' 'VarTag'
'siteFilter' 'SomConfLev' 'SomNotes' 'caseRsAD' 'caseAD' 'caseAF'
'caseDP' ..]]

10.读取excel时带列名读取#

读取时增加参数:header=None

1
2
3
4
5
6
7
8
#!/usr/bin/evn python
#-*-coding:utf-8-*-
import pandas as pd

path="I:\\download\\临时\\test.xlsx"
data=pd.read_excel(path,sheet_name="SmallVariations",header=None)
nrows=data.shape[0]
ncols=data.columns.size

运行结果:

1
2
Max Rows:6   //比上面的读取多了一行
Max Columns:100

11.操作excel插入列#

image

1
2
3
4
5
6
7
8
9
10
11
#!/usr/bin/evn python
#-*-coding:utf-8-*-
import pandas as pd

path="I:\\download\\临时\\test2.xlsx"
data=pd.read_excel(path,sheet_name="shtt1")

# 新增列
tmp=data['Stop'].values
data.insert(6,'Stop2',tmp)
pd.DataFrame(data).to_excel("I:\\download\\临时\\test2.xlsx",sheet_name="shtt1",index=False,header=True)

操作结果:
image

11.1 插入行列操作#

image

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#!/usr/bin/evn python
#-*-coding:utf-8-*-
import pandas as pd

path="I:\\download\\临时\\test2.xlsx"
data=pd.read_excel(path,sheet_name="sheet1")
newrow=['孙悟空','沙僧','八戒','小白龙']
#增加行数据,在第6行增加
data.loc[6]=newrow

#增加列,给定默认值None
data['成绩']=None

#列变动后再插入行
newrow2=['孙悟空','沙僧','八戒','小白龙','90']
data.loc[7]=newrow2
pd.DataFrame(data).to_excel("I:\\download\\临时\\test2.xlsx",sheet_name="sheet1",index=False,header=True)

执行结果:
image

11.2 插入时左边多一列的问题#

image
使用如下代码编辑后,结果如下:

1
2
data = pd.read_excel(path)
pd.DataFrame(data).to_excel(path, sheet_name="Sheet1", header=True)

执行结果后结果如下:
image
解决方法:
在进行to_excel操作的时候加上参数index=False,如下:

1
2
3


pd.DataFrame(data).to_excel(path, sheet_name="Sheet1",index=False, header=True)

image

11.3设置列宽#

原始的表格如果列宽不够的话,会隐藏显示,如下图:
image
那么如何设置列宽度呢,参见如下代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#创建一个空的Excel
path="I:\\download\\zz.xlsx"

data = pd.read_excel(path)
df = pd.DataFrame(data)

# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter(path, engine='xlsxwriter')

# Convert the dataframe to an XlsxWriter Excel object.这里注意index要设置为false,不然结果会多一列数据
df.to_excel(writer, sheet_name='Sheet1', index=False)

# Get the xlsxwriter objects from the dataframe writer object.
workbook = writer.book
worksheet = writer.sheets['Sheet1']

# Add some cell formats.
format1 = workbook.add_format({"font_name": u"微软雅黑"})

# Set the column width and format.
worksheet.set_column('A:B', 18, format1)
worksheet.set_column('C:H', 21, format1)
df.to_excel(path, sheet_name="Sheet1", index=False, header=True)
writer.save()

image

11.3.1 实际使用的一个程序例子#

从数据库中用以下SQL将查询出来的数据写入到EXCEL中,并设置列宽

1
2
3
4
5
6
7
8
select trim(BOTH '"' FROM json_extract(esi.fdynamic_attribute,'$.f31')) as 'RUN号',qc.fsample_code as '样本编号',
ai.fsubmit_time as '提交时间',ai.fstart_time as '分析开始时间',ai.ffinish_time as '分析结束时间',ai.felapsed_time as '运行耗时',
trim(BOTH '"' FROM json_extract(rinfo.fdynamic_attribute,'$.f50')) as '审核时间',trim(BOTH '"' FROM json_extract(rinfo.fdynamic_attribute,'$.f72')) as '报告时间' from a_info ai
LEFT JOIN e_sequence_detail esd on (ai.fsequence_detail_id=esd.fid)
LEFT JOIN e_sequence_info esi on (esi.fid=esd.fsequence_id)
LEFT JOIN qc_info qc on (ai.fid=qc.finfo_id)
LEFT JOIN r_info rinfo on (rinfo.fqc_id=qc.fid)
where trim(BOTH '"' FROM json_extract(esi.fdynamic_attribute,'$.f31')) ='F300000337';

image
编写的Python程序:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/evn python
#-*-coding:utf-8-*-
import pandas as pd
import pymysql


class Export(object):
def __init__(self):
self.data = pd.read_excel("I:\\download\\zz.xlsx", sheet_name="Sheet1")
self.data['RUN号'] = None
self.data['样本编号'] = None
self.data['提交时间'] = None
self.data['分析开始时间'] = None
self.data['分析结束时间'] = None
self.data['运行耗时'] = None
self.data['审核时间'] = None
self.data['报告时间'] = None

def connect(self):
db =pymysql.connect('127.0.0.1' ,'root' ,'123456' ,'oncoosplus_zhongzhong' ,use_unicode=True, charset='utf8')
cursor =db.cursor()
return cursor ,db

def getdata(self,runcode):
cursor,db = self.connect()
sql='''
select trim(BOTH '"' FROM json_extract(esi.fdynamic_attribute,'$.f31')) as 'RUN号',qc.fsample_code as '样本编号', ai.fsubmit_time as '提交时间',ai.fstart_time as '分析开始时间',ai.ffinish_time as '分析结束时间',ai.felapsed_time as '运行耗时', trim(BOTH '"' FROM json_extract(rinfo.fdynamic_attribute,'$.f50')) as '审核时间',trim(BOTH '"' FROM json_extract(rinfo.fdynamic_attribute,'$.f72')) as '报告时间' from a_info ai LEFT JOIN e_sequence_detail esd on (ai.fsequence_detail_id=esd.fid) LEFT JOIN e_sequence_info esi on (esi.fid=esd.fsequence_id) LEFT JOIN qc_info qc on (ai.fid=qc.finfo_id) LEFT JOIN r_info rinfo on (rinfo.fqc_id=qc.fid) where trim(BOTH '"' FROM json_extract(esi.fdynamic_attribute,'$.f31')) =
'''
cursor.execute(sql+"'"+runcode+"';")
# print(sql+runcode+"';")
# print(cursor.fetchall())
for i,item in enumerate(cursor.fetchall()):
self.data.loc[i+1] = item ##插入行数据
df = pd.DataFrame(self.data) #.to_excel("I:\\download\\zz.xlsx", sheet_name="Sheet1", index=False, header=True)
self.setFormat(df)

def setFormat(self,df):
# df = pd.DataFrame(self.data)

# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('I:\\download\\zz.xlsx', engine='xlsxwriter')

# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet1',index=False)

# Get the xlsxwriter objects from the dataframe writer object.
workbook = writer.book
worksheet = writer.sheets['Sheet1']

# Add some cell formats.
format1 = workbook.add_format({"font_name": u"微软雅黑"})

# Set the column width and format.
worksheet.set_column('A:B', 18, format1)
worksheet.set_column('C:H', 20, format1)
df.to_excel("I:\\download\\zz.xlsx", sheet_name="Sheet1", index=False, header=True)
writer.save()

if __name__ == '__main__':
#创建一个空的Excel
path="I:\\download\\zz.xlsx"
df = pd.DataFrame()
df.to_excel("I:\\download\\zz.xlsx")

export=Export()
export.getdata("F300000337")

执行结果:
image

pandas官方文档
pandas中文文档
获取列标签参考博客
pandas基础操作参考博客
excel插入列参考博客
excel插入列保存参考博客
操作列参考博客
左边多一列参考博客
设置列宽参考官方文档
设置列宽参考博客