相似度函數
概述
? ? 比較兩個字段的相似度
?? 最近有人問到關于兩個字段求相似度的函數,所以就寫了一篇關于相似度的函數,分別是“簡單的模糊匹配”,“順序匹配”,“一對一位置匹配”。在平時的這種函數可能會需要用到,可能業務需求不一樣,這里只給出參照,實際情況可以相對修改。
本文所有的兩個字段比較都是除以比較字段本身,例如A與B比較,找出的長度除以A的長度,因為考慮如果A的長度大于B的長度,相似度會超100%,例如‘abbc’,'ab'.
如果大家想除以B的長度,只需要在語句末尾將‘ SET @num=@num*1.0/LEN(@Cloumna) ’修改成‘ SET @num=@num*1.0/LEN(@Cloumnb) ’
步驟
? ?1.兩個字段簡單相似
?
--
-兩個字段簡單相似
CREATE
FUNCTION
DBO.FN_Resemble
(
@Cloumna
NVARCHAR
(
MAX
),
@Cloumnb
NVARCHAR
(
MAX
)
)
RETURNS
FLOAT
AS
BEGIN
DECLARE
@num
FLOAT
,
@len
int
SET
@Cloumna
=
ISNULL
(
@Cloumna
,
0
)
SET
@Cloumnb
=
ISNULL
(
@Cloumnb
,
0
)
SET
@len
=
1
SET
@num
=
0
WHILE
(
LEN
(
@Cloumna
)
<>
0
AND
LEN
(
@CloumnB
)
<>
0
)
BEGIN
WHILE
(
@len
<=
LEN
(
@Cloumna
))
BEGIN
DECLARE
@a
NVARCHAR
(
4
)
SET
@a
=
''
SET
@a
=
SUBSTRING
(
@Cloumna
,
@len
,
1
)
IF
(
CHARINDEX
(
@a
,
@CloumnB
)
>
0
)
BEGIN
SET
@num
=
@num
+
1
END
SET
@len
=
@len
+
1
END
SET
@num
=
@num
*
1.0
/
LEN
(
@Cloumna
)
BREAK
END
RETURN
@num
END
--
--測試代碼
SELECT
DBO.FN_Resemble(
'
ABDC321G
'
,
'
ABDC123G
'
)
?
2.兩個字段順序相似
--
-兩個字段順序相似
CREATE
FUNCTION
DBO.FN_Resemble_order
(
@Cloumna
NVARCHAR
(
MAX
),
@Cloumnb
NVARCHAR
(
MAX
)
)
RETURNS
FLOAT
AS
BEGIN
DECLARE
@num
FLOAT
,
@len
int
SET
@Cloumna
=
ISNULL
(
@Cloumna
,
0
)
SET
@Cloumnb
=
ISNULL
(
@Cloumnb
,
0
)
SET
@len
=
1
SET
@num
=
0
WHILE
(
LEN
(
@Cloumna
)
<>
0
AND
LEN
(
@CloumnB
)
<>
0
)
BEGIN
DECLARE
@a
NVARCHAR
(
4
)
DECLARE
@b
NVARCHAR
(
4
)
IF
(
LEN
(
@Cloumna
)
>=
LEN
(
@CloumnB
))
BEGIN
WHILE
(
@len
<=
LEN
(
@CloumnB
))
BEGIN
SET
@a
=
''
SET
@a
=
SUBSTRING
(
@Cloumna
,
@len
,
1
)
SET
@b
=
''
SET
@b
=
SUBSTRING
(
@CloumnB
,
@len
,
1
)
IF
(
@a
=
@b
)
BEGIN
SET
@num
=
@num
+
1
END
ELSE
BEGIN
break
END
SET
@len
=
@len
+
1
END
END
ELSE
IF
(
LEN
(
@Cloumna
)
<
LEN
(
@CloumnB
))
BEGIN
WHILE
(
@len
<=
LEN
(
@Cloumna
))
BEGIN
SET
@a
=
''
SET
@a
=
SUBSTRING
(
@Cloumna
,
@len
,
1
)
SET
@b
=
''
SET
@b
=
SUBSTRING
(
@CloumnB
,
@len
,
1
)
IF
(
@a
=
@b
)
BEGIN
SET
@num
=
@num
+
1
END
ELSE
BEGIN
break
END
SET
@len
=
@len
+
1
END
END
SET
@num
=
@num
*
1.0
/
LEN
(
@Cloumna
)
BREAK
END
RETURN
@num
END
go
--
--測試代碼
SELECT
DBO.FN_Resemble_order(
'
ABDC456G
'
,
'
ABDC123G
'
)
?
?
3.兩個字段一對一相似
?
--
-兩個字段一對一相似
CREATE
FUNCTION
DBO.FN_Resemble_onebyone
(
@Cloumna
NVARCHAR
(
MAX
),
@Cloumnb
NVARCHAR
(
MAX
)
)
RETURNS
FLOAT
AS
BEGIN
DECLARE
@num
FLOAT
,
@len
int
SET
@Cloumna
=
ISNULL
(
@Cloumna
,
0
)
SET
@Cloumnb
=
ISNULL
(
@Cloumnb
,
0
)
SET
@len
=
1
SET
@num
=
0
WHILE
(
LEN
(
@Cloumna
)
<>
0
AND
LEN
(
@CloumnB
)
<>
0
)
BEGIN
DECLARE
@a
NVARCHAR
(
4
)
DECLARE
@b
NVARCHAR
(
4
)
IF
(
LEN
(
@Cloumna
)
>=
LEN
(
@CloumnB
))
BEGIN
WHILE
(
@len
<=
LEN
(
@CloumnB
))
BEGIN
SET
@a
=
''
SET
@a
=
SUBSTRING
(
@Cloumna
,
@len
,
1
)
SET
@b
=
''
SET
@b
=
SUBSTRING
(
@CloumnB
,
@len
,
1
)
IF
(
@a
=
@b
)
BEGIN
SET
@num
=
@num
+
1
END
SET
@len
=
@len
+
1
END
END
ELSE
IF
(
LEN
(
@Cloumna
)
<
LEN
(
@CloumnB
))
BEGIN
WHILE
(
@len
<=
LEN
(
@Cloumna
))
BEGIN
SET
@a
=
''
SET
@a
=
SUBSTRING
(
@Cloumna
,
@len
,
1
)
SET
@b
=
''
SET
@b
=
SUBSTRING
(
@CloumnB
,
@len
,
1
)
IF
(
@a
=
@b
)
BEGIN
SET
@num
=
@num
+
1
END
SET
@len
=
@len
+
1
END
END
SET
@num
=
@num
*
1.0
/
LEN
(
@Cloumna
)
BREAK
END
RETURN
@num
END
--
--測試代碼
SELECT
DBO.FN_Resemble_onebyone(
'
ABDC456G
'
,
'
ABDC123G
'
)
?
總結
?? 如果大家覺得文章對大家有幫助,麻煩給個推薦,謝謝。
?
|
備注: ??? 作者: pursuer.chen ??? 博客: http://www.cnblogs.com/chenmh 本站點所有隨筆都是原創,歡迎大家轉載;但轉載時必須注明文章來源,且在文章開頭明顯處給明鏈接,否則保留追究責任的權利。 《歡迎交流討論》 |
更多文章、技術交流、商務合作、聯系博主
微信掃碼或搜索:z360901061
微信掃一掃加我為好友
QQ號聯系: 360901061
您的支持是博主寫作最大的動力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點擊下面給點支持吧,站長非常感激您!手機微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元

