Sqlserver
 sql >> Cơ Sở Dữ Liệu >  >> RDS >> Sqlserver

Thuật toán T-SQL để mã hóa các ký tự HTML không an toàn dưới dạng tham chiếu thực thể ký tự HTML

Mã hóa năm ký tự đặc biệt bằng CTE đệ quy:

DECLARE 
  @unsafe NVARCHAR(MAX),
  @safe   NVARCHAR(MAX) 

--
-- Create the unsafe html string
-- 
SET @unsafe = N'html''s encoding "method" is <= or >= & 1234 ' + NCHAR(129) 
--
-- Use a recursive CTE to iterate through each character in the string
-- 
;WITH cte AS 
(
  -- 
  -- The first row will contain the original
  -- string, an empty string to be used to 
  -- build the "safe" string, and a position
  -- column to mark the character position
  -- of the loop
  -- 
  SELECT 
    @unsafe AS unsafe_html,
    CONVERT(NVARCHAR(MAX), '') AS safe_html,
    1 AS pos
  WHERE @unsafe IS NOT NULL AND LEN(@unsafe) > 0 
  UNION ALL
  -- 
  -- Create a loop: 
  -- The anchor row starts at position one.
  -- Increment the position by one for each pass.
  -- Stop when the position value is equal to the string lenth.
  -- Evaluate the character in each string
  -- If the ASCII value > 128, use the &# format.
  -- Otherwise, check for 5 special characters: " & ' < >
  -- Use the encoding reference or just the original character
  --
  SELECT 
    @unsafe AS unsafe_html,
    CONVERT(NVARCHAR(MAX), safe_html + 
    CASE WHEN UNICODE(SUBSTRING(unsafe_html, pos, 1)) > 128 
         THEN '&#' + CONVERT(NVARCHAR(10), UNICODE(SUBSTRING(unsafe_html, pos, 1)))  
         ELSE CASE SUBSTRING(unsafe_html, pos, 1)
              WHEN '"'  THEN '&quot' 
              WHEN '&'  THEN '&amp'
              WHEN '''' THEN '&apos'
              WHEN '<'  THEN '&lt'
              WHEN '>'  THEN '&gt'
              ELSE SUBSTRING(unsafe_html, pos, 1)
              END 
         END ) AS safe_html,
    pos + 1 AS pos
  FROM cte
  WHERE pos <= LEN(@unsafe)
) 
--
-- Each pass through the string creates a row in the CTE
-- The last row will have the position value of the string length + 1
-- Use that row as the safe html string
-- SQL Server allows a max recursion of 32767
-- 
SELECT @safe = (
  SELECT safe_html 
  FROM cte
  WHERE pos = LEN(@unsafe) + 1
) 
OPTION (MAXRECURSION 32767) 

SELECT @safe

-- html&aposs encoding &quotmethod&quot is &lt= or &gt= &amp 1234 &#129

Phiên bản ban đầu:

DECLARE @s NVARCHAR(100)

SET @s = '<html>unsafe & safe<html>'
SELECT @s 
SELECT (SELECT @s FOR XML PATH(''))

---------------------------------------
<html>unsafe & safe<html>

-----------------------------------------
&lt;html&gt;unsafe &amp; safe&lt;html&gt;

Mã hóa đầy đủ với tất cả các tham chiếu chính thức:

DECLARE 
    @unsafe  NVARCHAR(MAX),
    @safe NVARCHAR(MAX) 

-- Build string with first 10,000 unicode chars
SELECT @unsafe = COALESCE(@unsafe, '') + NCHAR(number) + ' ' 
FROM (
    SELECT TOP 10000 ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS number
    FROM sys.all_objects s1 CROSS JOIN sys.all_objects s2 
) t

-- Build table variable with character entity references defined in HTML 4.0
-- Reference: http://www.htmlcodetutorial.com/characterentities_famsupp_69.html
DECLARE @t TABLE (
    name NVARCHAR(25) NOT NULL, 
    unicode_val INT NOT NULL PRIMARY KEY 
) 

INSERT @t 
VALUES
('&quot', 34),
('&amp', 38),
('&apos', 39),
('&lt', 60),
('&gt', 62),
('&nbsp', 160),
('&iexcl', 161),
('&cent', 162),
('&pound', 163),
('&curren', 164),
('&yen', 165),
('&brvbar', 166),
('&sect', 167),
('&uml', 168),
('&copy', 169),
('&ordf', 170),
('&laquo', 171),
('&not', 172),
('&shy', 173),
('&reg', 174),
('&macr', 175),
('&deg', 176),
('&plusmn', 177),
('&sup2', 178),
('&sup3', 179),
('&acute', 180),
('&micro', 181),
('&para', 182),
('&middot', 183),
('&cedil', 184),
('&sup1', 185),
('&ordm', 186),
('&raquo', 187),
('&frac14', 188),
('&frac12', 189),
('&frac34', 190),
('&iquest', 191),
('&Agrave', 192),
('&Aacute', 193),
('&Acirc', 194),
('&Atilde', 195),
('&Auml', 196),
('&Aring', 197),
('&AElig', 198),
('&Ccedil', 199),
('&Egrave', 200),
('&Eacute', 201),
('&Ecirc', 202),
('&Euml', 203),
('&Igrave', 204),
('&Iacute', 205),
('&Icirc', 206),
('&Iuml', 207),
('&ETH', 208),
('&Ntilde', 209),
('&Ograve', 210),
('&Oacute', 211),
('&Ocirc', 212),
('&Otilde', 213),
('&Ouml', 214),
('&times', 215),
('&Oslash', 216),
('&Ugrave', 217),
('&Uacute', 218),
('&Ucirc', 219),
('&Uuml', 220),
('&Yacute', 221),
('&THORN', 222),
('&szlig', 223),
('&agrave', 224),
('&aacute', 225),
('&acirc', 226),
('&atilde', 227),
('&auml', 228),
('&aring', 229),
('&aelig', 230),
('&ccedil', 231),
('&egrave', 232),
('&eacute', 233),
('&ecirc', 234),
('&euml', 235),
('&igrave', 236),
('&iacute', 237),
('&icirc', 238),
('&iuml', 239),
('&eth', 240),
('&ntilde', 241),
('&ograve', 242),
('&oacute', 243),
('&ocirc', 244),
('&otilde', 245),
('&ouml', 246),
('&divide', 247),
('&oslash', 248),
('&ugrave', 249),
('&uacute', 250),
('&ucirc', 251),
('&uuml', 252),
('&yacute', 253),
('&thorn', 254),
('&yuml', 255),
('&OElig', 338),
('&oelig', 339),
('&Scaron', 352),
('&scaron', 353),
('&Yuml', 376),
('&fnof', 402),
('&circ', 710),
('&tilde', 732),
('&Alpha', 913),
('&Beta', 914),
('&Gamma', 915),
('&Delta', 916),
('&Epsilon', 917),
('&Zeta', 918),
('&Eta', 919),
('&Theta', 920),
('&Iota', 921),
('&Kappa', 922),
('&Lambda', 923),
('&Mu', 924),
('&Nu', 925),
('&Xi', 926),
('&Omicron', 927),
('&Pi', 928),
('&Rho', 929),
('&Sigma', 931),
('&Tau', 932),
('&Upsilon', 933),
('&Phi', 934),
('&Chi', 935),
('&Psi', 936),
('&Omega', 937),
('&alpha', 945),
('&beta', 946),
('&gamma', 947),
('&delta', 948),
('&epsilon', 949),
('&zeta', 950),
('&eta', 951),
('&theta', 952),
('&iota', 953),
('&kappa', 954),
('&lambda', 955),
('&mu', 956),
('&nu', 957),
('&xi', 958),
('&omicron', 959),
('&pi', 960),
('&rho', 961),
('&sigmaf', 962),
('&sigma', 963),
('&tau', 964),
('&upsilon', 965),
('&phi', 966),
('&chi', 967),
('&psi', 968),
('&omega', 969),
('&thetasym', 977),
('&upsih', 978),
('&piv', 982),
('&ensp', 8194),
('&emsp', 8195),
('&thinsp', 8201),
('&zwnj', 8204),
('&zwj', 8205),
('&lrm', 8206),
('&rlm', 8207),
('&ndash', 8211),
('&mdash', 8212),
('&lsquo', 8216),
('&rsquo', 8217),
('&sbquo', 8218),
('&ldquo', 8220),
('&rdquo', 8221),
('&bdquo', 8222),
('&dagger', 8224),
('&Dagger', 8225),
('&bull', 8226),
('&hellip', 8230),
('&permil', 8240),
('&prime', 8242),
('&Prime', 8243),
('&lsaquo', 8249),
('&rsaquo', 8250),
('&oline', 8254),
('&frasl', 8260),
('&euro', 8364),
('&image', 8465),
('&weierp', 8472),
('&real', 8476),
('&trade', 8482),
('&alefsym', 8501),
('&larr', 8592),
('&uarr', 8593),
('&rarr', 8594),
('&darr', 8595),
('&harr', 8596),
('&crarr', 8629),
('&lArr', 8656),
('&uArr', 8657),
('&rArr', 8658),
('&dArr', 8659),
('&hArr', 8660),
('&forall', 8704),
('&part', 8706),
('&exist', 8707),
('&empty', 8709),
('&nabla', 8711),
('&isin', 8712),
('&notin', 8713),
('&ni', 8715),
('&prod', 8719),
('&sum', 8721),
('&minus', 8722),
('&lowast', 8727),
('&radic', 8730),
('&prop', 8733),
('&infin', 8734),
('&ang', 8736),
('&and', 8743),
('&or', 8744),
('&cap', 8745),
('&cup', 8746),
('&int', 8747),
('&there4', 8756),
('&sim', 8764),
('&cong', 8773),
('&asymp', 8776),
('&ne', 8800),
('&equiv', 8801),
('&le', 8804),
('&ge', 8805),
('&sub', 8834),
('&sup', 8835),
('&nsub', 8836),
('&sube', 8838),
('&supe', 8839),
('&oplus', 8853),
('&otimes', 8855),
('&perp', 8869),
('&sdot', 8901),
('&lceil', 8968),
('&rceil', 8969),
('&lfloor', 8970),
('&rfloor', 8971),
('&lang', 9001),
('&rang', 9002),
('&loz', 9674),
('&spades', 9824),
('&clubs', 9827),
('&hearts', 9829),
('&diams', 9830)

-- Build numbers table to parse the string
DECLARE @numbers TABLE (number INT NOT NULL PRIMARY KEY) 
INSERT @numbers
SELECT TOP (LEN(@unsafe)) ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS number
FROM sys.all_objects s1 CROSS JOIN sys.all_objects s2

-- Use numbers table to parse each character.
-- If a match is found in character entity reference table,
-- then use the safe substitute. Otherwise, if the unicode
-- value is greater than 128, use &#<unicode char value>.
-- Finally, use the original character if nothing else
-- is a match
SELECT @safe = COALESCE(@safe,'') 
       + COALESCE(name, 
         CASE WHEN UNICODE(SUBSTRING(@unsafe, number, 1)) > 128 THEN '&#' 
          + CONVERT(NVARCHAR(10), UNICODE(SUBSTRING(@unsafe, number, 1))) 
          ELSE SUBSTRING(@unsafe, number, 1) END)
FROM @numbers 
LEFT OUTER JOIN @t  
  ON UNICODE(SUBSTRING(@unsafe, number, 1)) = unicode_val

SELECT @safe AS [safe]

Results:  
! &quot # $ % &amp &apos ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; 
&lt = &gt ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 
[ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { 
| } ~   &#129 &#130 &#131 &#132 &#133 &#134 &#135 &#136 &#137 &#138 
&#139 &#140 &#141 &#142 &#143 &#144 &#145 &#146 &#147 &#148 &#149 
&#150 &#151 &#152 &#153 &#154 &#155 &#156 &#157 &#158 &#159 &nbsp 
&iexcl &cent &pound &curren &yen &brvbar &sect &uml &copy &ordf 
&laquo &not &shy &reg &macr &deg &plusmn &sup2 &sup3 &acute &micro 
&para &middot &cedil &sup1 &ordm &raquo &frac14 &frac12 &frac34 
&iquest &Agrave &Aacute &Acirc &Atilde &Auml &Aring &AElig &Ccedil 
&Egrave &Eacute &Ecirc &Euml &Igrave &Iacute &Icirc &Iuml &ETH &Ntilde 
&Ograve &Oacute &Ocirc &Otilde &Ouml &times &Oslash &Ugrave &Uacute 
&Ucirc &Uuml &Yacute &THORN &szlig &agrave &aacute &acirc &atilde &auml 
&aring &aelig &ccedil &egrave &eacute &ecirc &euml &igrave &iacute &icirc 
&iuml &eth &ntilde &ograve &oacute &ocirc &otilde &ouml &divide &oslash 
&ugrave &uacute &ucirc &uuml &yacute &thorn &yuml &#256 &#257 &#258 &#259 
&#260 &#261 &#262 &#263 &#264 &#265 &#266...


  1. Database
  2.   
  3. Mysql
  4.   
  5. Oracle
  6.   
  7. Sqlserver
  8.   
  9. PostgreSQL
  10.   
  11. Access
  12.   
  13. SQLite
  14.   
  15. MariaDB
  1. SQL Server - dừng hoặc ngắt việc thực thi tập lệnh SQL

  2. Cách ánh xạ một trường thực thể có tên là một từ dành riêng trong JPA

  3. Sử dụng Dapper với kiểu không gian SQL làm tham số

  4. Truyền nhiều giá trị cho một tham số của một hàm trong SQL

  5. Loại dữ liệu nào nên được sử dụng để lưu trữ số điện thoại trong SQL Server 2005?