工作常用函数总结

####将结果存excel

1
2
3
writer=pd.ExcelWriter(r'C://user/desktop')
data.to_excel(writer,sheet_name='heet1')
writer.save()

从excel读取数据

1
pd.read_excel(r'C:\users\adminstrator\desktop',sheet_name='Sheet1')

####从mysql读取数据

1
2
3
4
5
conn=pymysql.connect(host='localhost',user='root',password='123456',databse='comp,port=3306)
sql='SELECT * FROM kl'
data = pd.read_sql(sql,conn)
conn.close()
datasss

将DataFrame存入mysql

1
2
3
4
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://root:123456@localhost:3306/work?charset=UTF8mb4')
pd.io.sql.to_sql(df2, 'industry_1', engine, index=False, if_exists='append', chunksize=10000)
engine.dispose()

一条一条存入mysql

1
2
3
4
5
6
7
8
db = pymysql.connect(host='localhost',port=3306,user='root',database='ceshi',password='123456',charset='utf8mb4')
cursor = db.cursor()
sql = 'insert into kll (name,sex,age) values(%s,%s,%s)'
data = ('张三','男',23)
cursor.execute(sql,data)
db.commit()
cursor.close()
db.close()

批量插入mysql

1
2
3
4
5
6
7
8
db = pymysql.connect(host='localhost',port=3306,user='root',database='ceshi',password='123456',charset='utf8mb4')
cursor = db.cursor()
sql = 'insert into kl (name,sex,age) values(%s,%s,%s)'
data = [('张三','男',23),('李四','男',32),('小美','女',18)]
cursor.executemany(sql,data)
db.commit()
cursor.close()
db.close()

进程池代码

1
2
3
4
5
6
7
8
pool = Pool(30)
n = 10000
count = (len(company_name) // n) + 1
for num in range(count):
company_list = company_name[num * n:(num + 1) * n]
pool.apply_async(func, args=(company_list,))
pool.close()
pool.join()

将一对多转换成多对多

1
2
3
4
con2 = pymysql.connect(host="192.168.0.222", port=3306, user="user_r", password='1q2w3e4r', db='industry_cn_test',charset="utf8")
sql_zong = "select `names` as jiqun_name ,ipc_5_list from ipc_split_10000_c_list_v1 "
res = pd.read_sql(sql_zong, con2)
ipc_group_unique = res.drop('ipc_5_list', axis=1).join(res['ipc_5_list'].str.split(',', expand=True).stack().reset_index(level=1, drop=True).rename('ipcr'))

####将df中某两列互换

1
2
3
4
5
6
df = pd.DataFrame([[5,6],[4,3],[7,8]])
df.columns = ['one','two']
df['three'] = df['one']
print(df)
df.loc[df.one>df.two,'three'] = df.loc[df.one>df.two,'two']
df

es返回不符合条件的个数

1
2
3
4
5
6
7
8
9
10
11
es.search(index=['patent_cn_v71'],body={
'query':{
'bool':{
'must_not':{
'exists':{
'fields':'app_text'
}
}
}
}
})

连接250数据(读)

1
conn1 = pymysql.connect(host='192.168.0.250',user='user_r',password='1q2w3e4r',database='pre_formal_2')

####连接250高权限

1
conn1=pymysql.connect(host='192.168.0.250',user='user_rwd_2',password='5d6f7g8h',database='pre_formal_2')