1
ORACLE_HOME
/
home
/
oracle
/
client_1
2
ORACLE_DSN dbi:Oracle:host
=
192.168
.
11.1
;sid
=
orcl
3
ORACLE_USER manager
4
ORACLE_PWD tiger
5
SCHEMA
test
6
TYPE
TABLE
VIEW
PACKAGE COPY
7
PG_NUMERIC_TYPE
0
8
PG_INTEGER_TYPE
1
9
DEFAULT_NUMERIC
float
10
SKIP fkeys pkeys ukeys indexes checks
11
NLS_LANG AMERICAN_AMERICA.UTF8
12
PG_DSN dbi:Pg:dbname
=
easyetl;host
=
127.0
.
0.1
;port
=
5432
13
PG_USER easyetl
14
PG_PWD password
15
OUTPUT output.sql
1-4 配置源端Oracle的信息
5??? oracle的schema取值
6??? 準備轉化的數據類型,也包括導數據的copy命令
7-9? 用來轉化oracle的number(p,s)到PG的類型:
7表示是否使用PG內部的數據類型,0表示不使用,1表示使用
8表示在7設置為0時,如果8設置為1,則類型number(p)的定義變更為integer;如果8設置為0,則number(p)也轉化為numeric(p)
9表示是8設置為1的時候,類型number轉化為float,如果8設置為0,則9不起作用.
簡單的設置,如果7,8均設置為0,那么number(p) --> numeric(p),number(p,s) --> numeric(p,s), number --> numeric
10 約束們是否需要創建
11 語言選擇
12-14 配置目的端PG(GP亦可),如果這三行信息不配置,也沒關系,可以生成oracle轉化為PG的腳本
15 生成文件
遷移中出現的情況:
(1) 表可以完全遷移過去
(2) 視圖里面如果沒有起別名的話,也需要手動添加別名
(3) package需要手動修改.注:ver13版本的package生成需要把perform/decode屏蔽掉,因為這二點未做好,模塊為PLSQL.pm.
當然package轉化不僅僅只是這部分東西,主要的有:
a 別名需要顯式寫出
b 隱式轉化要顯式寫出
c 函數的差異(GP官方有一套Oracle的函數實現,基本上夠用)
d oracle里面非標準寫法,如: a left join b寫成 a,b where a.xx=b.xx(+)
?
2. Oracle的數據遷移到GP的實現過程
#
!/bin/bash
if
[ $
#
-lt 3 ];then
echo
'
Usage `basename $0` pipe tablename control
'
exit
1
fi
pipename
=$
1
tablename
=$
2
control
=$
3
condition
=$
4
mknod
$pipename
p
/root/software/sqluldr2 user=manager/tigerd
@orcl
query=
"
select * from $tablename where $condition
"
field=
0x7c
file=
$pipename
charset=utf8 text=CSV safe=yes persql=
"
begin dbms_lock.sleep(2); end;
"
&
gpload
-f
$control
-l gpload.
log
rm
-rf
$pipename
ora2gp.sh --生成control文件,包括管道文件名稱.然后調用上述進程實現載入過程.
#
!/usr/bin/env python
#
-*- coding:utf-8 -*-
import
yaml
import
subprocess
import
sys
import
os
#
Script starts from here paramnum=
len(sys.argv) datadt=20140820
condition=
"
1=1
"
tplpath=
"
/root/template/
"
pipepath=
"
/tmp/pipe
"
batname=
"
/root/script/dataload.sh
"
if (paramnum == 1
):
print
'
Usage:
'+ sys.argv[0]+
'
tablename
'
sys.exit()
elif(paramnum == 2
): tablename=sys.argv[1
]
elif(paramnum == 3
): tablename=sys.argv[1
] datadt=sys.argv[2
]
elif(paramnum == 4
): tablename=sys.argv[1
] datadt=sys.argv[2
] condition=sys.argv[3
]
else
:
print
'
Usage:
'+ sys.argv[0]+
'
tablename datadt condition. (datadt condition is optional)!
'
sys.exit() pid=
os.getpid() pipename=pipepath+
str(pid) f = open(tplpath+
"
gp_template_load.ctl
"
) dataMap =
yaml.load(f) f.close() dataMap[
'
GPLOAD
'][
'
INPUT
'][0][
'
SOURCE
'][
'
FILE
'][0]=
pipename dataMap[
'
GPLOAD
'][
'
OUTPUT
'][0][
'
TABLE
']=
tablename dataMap[
'
GPLOAD
'][
'
INPUT
'][6][
'
ERROR_TABLE
']=tablename+
'
_err
'
filename=tplpath+tablename+
'
.ctl
'
f = open(filename,
'
w
'
) yaml.dump(dataMap,f) f.close() handle=
subprocess.Popen([batname,pipename,tablename,filename,condition]) handle.communicate()
VERSION: 1.0.0.1
DATABASE: dw
USER: manager
HOST: gp
PORT: 5432
GPLOAD:
INPUT:
- SOURCE:
LOCAL_HOSTNAME:
- gp
FILE:
- /tmp/mypipe
PORT_RANGE: [8001,9000]
- FORMAT: csv
- DELIMITER: ','
- QUOTE: '"'
- HEADER: true
- ERROR_LIMIT: 10000
- ERROR_TABLE: tablename_err
OUTPUT:
- TABLE: tablename
- MODE: INSERT
PRELOAD:
- TRUNCATE: true
后續操作:
上面的程序可以當作同步使用,但是真正的在生產使用就會有點不太讓人放心.
原因有三:
(1)dataload.sh里面的sqluldr是放在后臺處理的.當sqluldr出現異常,gpload可能會等待.當gpload出現異常的時候,sqluldr還是會載出文件.而且dataload.sh是fork出二個進程,當進程
出現異常,還需要手動尋找,kill掉.
(2)日常記錄與處理.
(3)oracle與gp的表結構要嚴格一致才行.
基于此,寫了可以統一處理fork的進程,增加了獲取gp column list,加上日志處理這幾部分.
oraconf文件格式:
#CONFNAME:USER^PASS^TNSNAME
gpconf文件格式:
#host:port:database:user:passwd
control文件看上面以及官方文檔吧.
#!/bin/
sh
. greenplum_loaders_path.
sh
. setenv
if
[ $# -lt
4
];
then
echo
"
Usage : `basename $0` confname etl_date mode src_tbname tgt_tbname
"
echo
"
confname : configuration at ${PWD}/conf/oraconf
"
echo
"
etl_date : YYYYMMDD
"
echo
"
mode : 1 truncate; 2 append
"
echo
"
src_tbname : oracle datasource tablename
"
echo
"
tgt_tbname(optional) : greenplum datasource tablename
"
exit
1
fi
#trap the exception quit
trap
'
log_info "TERM/INTERRUPT(subprocess) close";close_subproc
'
INT TERM
declare
-
a sublist
function
log_info()
{
DATETIME
=`
date
+
"
%Y%m%d %H:%M:%S
"
`
echo
-e
"
S $DATETIME P[$$]: $*
"
|
tee
-a
"
$LOGPATH
"
/
"
$ETLDATE
"
/
"
$GPTABLE
"
.log
}
function
collect_subproc()
{
local index
if
[ ${#sublist} -eq
0
];
then
index
=
0
else
index
=$[${#sublist}]+
1
fi
sublist[$index]
=$
1
}
function
close_subproc()
{
for
subid
in
${sublist[@]}
do
log_info
"
kill processid: $subid
"
kill
$subid
done
}
function
parse_yaml()
{
local
file
=$
1
local tablename
=$
2
local pipename
=$
3
local etldate
=$
4
sed
-i -e
"
s/mypipe/
"
$pipename
"
/
"
-e
"
s/tablename_err/public.
"
$tablename
"
_err/
"
-e
"
s/\<tablename\>/
"
$tablename
"
/
"
-e
"
s/etl_date/
"
$etldate
"
/
"
$
file
}
if
[ $(
dirname
$
0
) ==
'
.
'
];
then
PRIPATH
=
${PWD}
else
PRIPATH
=$(
dirname
$
0
)
fi
TPLPATH
=
"
$PRIPATH
"
/
template
LOGPATH
=
"
$PRIPATH
"
/
log
CONFNAME
=$
1
ETLDATE
=$
2
MODE
=$
3
ORATABLE
=$
4
GPTABLE
=$
5
[
-z
"
$GPTABLE
"
] && GPTABLE=
"
$ORATABLE
"
[
! -d
"
$LOGPATH
"
/
"
$ETLDATE
"
] &&
mkdir
-p
"
$LOGPATH
"
/
"
$ETLDATE
"
PIPENAME
=
"
P
"
$$
"
$GPTABLE
"
eval `
grep
"
^$CONFNAME
"
"
$PRIPATH
"
/conf/oraconf |
awk
-F
'
:
'
'
{print $2}
'
|
awk
-F
'
^
'
'
{print "ORACLE_USER="$1";ORACLE_PASS="$2";ORACLE_SID="$3}
'
`
eval $(eval `
grep
^[^#]
"
$PRIPATH
"
/conf/gpconf |
awk
-F
'
:
'
-v table=$GPTABLE
'
{printf("psql -h %s -p %d -U %s %s -tAc \047\\\d %s \047",$1,$2,$4,$3,table)}
'
`|
awk
-F
"
|
"
'
{cmd=cmd$1","}END{print "collist="cmd}
'
)
collist
=`
echo
$collist|
sed
"
s/,$//g
"
`
echo
>>
"
$LOGPATH
"
/
"
$ETLDATE
"
/
"
$GPTABLE
"
.log
#create and modify template
for
gpload use
log_info
"
create template
"
$LOGPATH
"
/
"
$ETLDATE
"
/
"
$GPTABLE
"
.ctl.
"
cp
"
$TPLPATH
"
/gp_template_load_
"
$MODE
"
.ctl
"
$LOGPATH
"
/
"
$ETLDATE
"
/
"
$GPTABLE
"
.ctl
if
[ $? -ne
0
];
then
log_info
"
create template
"
$LOGPATH
"
/
"
$ETLDATE
"
/
"
$GPTABLE
"
.ctl failed.
"
exit
2
fi
parse_yaml
"
$LOGPATH
"
/
"
$ETLDATE
"
/
"
$GPTABLE
"
.ctl $GPTABLE $PIPENAME $ETLDATE
if
[ $? -ne
0
];
then
log_info
"
modify template
"
$LOGPATH
"
/
"
$ETLDATE
"
/
"
$GPTABLE
"
.ctl failed.
"
exit
2
fi
#create pipename
log_info
"
create pipe /tmp/
"
$PIPENAME
"
.
"
mknod
/tmp/
"
$PIPENAME
"
p
if
[ $? -ne
0
];
then
log_info
"
create pipe failed!
"
exit
3
fi
gpload
-f
"
$LOGPATH
"
/
"
$ETLDATE
"
/
"
$GPTABLE
"
.ctl -l
"
$LOGPATH
"
/
"
$ETLDATE
"
/
"
$GPTABLE
"
.log &
collect_subproc $
!
log_info
"
unload sql:select $collist from $ORATABLE
"
sqluldr2 user
=
"
$ORACLE_USER
"
/
"
$ORACLE_PASS
"@"
$ORACLE_SID
"
query=
"
select $collist from $ORATABLE
"
head
=Yes field=
0x7c
file
=/tmp/
"
$PIPENAME
"
charset=gb18030 text=CSV safe=yes presql=
"
begin dbms_lock.sleep(5); end;
"
log=+
"
$LOGPATH
"
/
"
$ETLDATE
"
/
"
$GPTABLE
"
.log &
collect_subproc $
!
wait
if
[ $? -ne
0
];
then
log_info
"
$GPTABLE load failed!
"
else
log_info
"
$GPTABLE load succ!
"
fi
log_info
"
rm -rf /tmp/
"
$PIPENAME
""
rm
-rf /tmp/
"
$PIPENAME
"
if
[ $? -ne
0
];
then
log_info
"
rm /tmp/
"
$PIPENAME
"
failed.
"
exit
4
fi
?
更多文章、技術交流、商務合作、聯系博主
微信掃碼或搜索:z360901061
微信掃一掃加我為好友
QQ號聯系: 360901061
您的支持是博主寫作最大的動力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點擊下面給點支持吧,站長非常感激您!手機微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元

