文件夾中文件數(shù)較多,每份文件較大的情況下,可以采用多進(jìn)程讀取文件
最后附完整項(xiàng)目代碼
#單進(jìn)程讀取文件夾中的單份文件
def
read_data
(
path
)
:
start
=
time
.
time
(
)
with
open
(
path
,
'rb'
)
as
f
:
filename
=
pickle
.
load
(
f
)
end
=
time
.
time
(
)
print
(
'Task runs %0.2f seconds.'
%
(
(
end
-
start
)
)
)
return
filename
#向數(shù)據(jù)庫插入數(shù)據(jù)
def
insert_data
(
db_connect
,
result
,
table
)
:
cursor
=
db_connect
.
cursor
(
)
#轉(zhuǎn)換數(shù)據(jù)格式,插入數(shù)據(jù)庫
static_result_df1
=
np
.
array
(
result
)
.
tolist
(
)
static_result_df2
=
list
(
map
(
tuple
,
static_result_df1
)
)
sql_truncate
=
"truncate {};"
.
format
(
table
)
sql_insert
=
'''
insert into {}
(columns_name
) values
(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
'''
.
format
(
table
)
try
:
# 執(zhí)行sql語句
cursor
.
execute
(
sql_truncate
)
cursor
.
executemany
(
sql_insert
,
static_result_df2
)
# 執(zhí)行sql語句
cursor
.
commit
(
)
print
(
"Done Task!"
)
except
:
# 發(fā)生錯(cuò)誤時(shí)回滾
cursor
.
rollback
(
)
cursor
.
close
(
)
if
__name__
==
'__main__'
:
#開啟進(jìn)程,與邏輯核保持一致
connect_db
=
connect_db
(
)
filepath
=
r
'D:\filename'
table
=
'table_name'
t1
=
time
.
time
(
)
pro_num
=
10
#進(jìn)程數(shù)
pool
=
Pool
(
processes
=
pro_num
)
job_result
=
[
]
#遍歷文件夾讀取所有文件
for
file
in
os
.
listdir
(
filepath
)
:
filename
=
filepath
+
'\\'
+
file
res
=
pool
.
apply_async
(
read_data
,
(
filename
,
)
)
job_result
.
append
(
res
)
pool
.
close
(
)
#關(guān)閉進(jìn)程池
pool
.
join
(
)
#合并所有讀取的文件
get_result
=
pd
.
DataFrame
(
)
for
tmp
in
job_result
:
get_result
=
get_result
.
append
(
tmp
.
get
(
)
)
t2
=
time
.
time
(
)
insert_data
(
connect_db
,
get_result
,
table
)
print
(
'It took a total of %0.2f seconds.'
%
(
t2
-
t1
)
)
完整項(xiàng)目代碼鏈接:https://github.com/AlisaAlbert/TransferData/blob/master/InsertData.py
更多文章、技術(shù)交流、商務(wù)合作、聯(lián)系博主
微信掃碼或搜索:z360901061

微信掃一掃加我為好友
QQ號(hào)聯(lián)系: 360901061
您的支持是博主寫作最大的動(dòng)力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點(diǎn)擊下面給點(diǎn)支持吧,站長非常感激您!手機(jī)微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點(diǎn)擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元
