예시로 공부하는 javascript 정규식

솔직히 설명 백날 읽어봐야 절대 이해 안된다. 코드로 바로 직행을 추천.

프코캠에서 regex 알려주길래 밤에 커피 마시면서 함 돌려봤다. 설명도 안 쓸테니 코드를 음미하자.

let testStr = "Hello, my name is Kevin.";
let testRegex = /Kevin/;
testRegex.test(testStr); // true

let petString = "James has a pet cat.";
let petRegex = /dog|cat|bird|fish/; // | 는 pipe char
let result = petRegex.test(petString);

let myString = "freeCodeCamp";
let fccRegex = /freeCodeCamp/i; // i를 붙여서 case insensitive로
let result = fccRegex.test(myString);

const myStr = 'hello my name is DarRen';
const nameRgx = /darren/i; // case insensitive

console.log(myStr.match(nameRgx)); //[ 'DarRen', index: 17, ... ]
console.log(nameRgx.test(myStr)); // true

let twinkleStar = 'Twinkle, twinkle, little star';
let starRegex = /Twinkle/ig; // g 붙여서 같은 거 있으면 여러번
let result = twinkleStar.match(starRegex); // [ 'Twinkle', 'twinkle' ]

const str = '사서 사니 사고 사는가';
const rgx = /사./g; // .은 wildcard임. 어떤 문자와도 매칭됨
console.log(str.match(rgx)); // [ '사서', '사니', '사고', '사는' ]

const str = 'big bug bog bag';
const rgx = /b[aieou]g/g; // [] 내부에 든 것중 하나만 만족하면 됨. bag, big, beg, bog, bug 해당
console.log(str.match(rgx)); // [ 'big', 'bug', 'bog', 'bag' ]

let quoteSample = "The quick brown fox jumps over the lazy dog.";
let alphabetRegex = /[A-Za-z]/ig; // - 가 붙으면 범위를 의미. 영어 대소문자면 다 됨. 
let result = quoteSample.match(alphabetRegex); // ['T', 'h', 'e', 'q', 'u', ... ]

// 아래 두 정규식은 같은 의미임
/[A-Za-z]/
/[a-z]/i

let quoteSample = "Blueberry 3.141592653s are delicious.";
let myRegex = /[h-s2-6]/ig; // 숫자도 가능
let result = quoteSample.match(myRegex);

let quoteSample = '3 blind! mice.';
let myRegex = /[^aeiou0-9]/gi; // [] 안에 든 ^는 반대를 의미. 즉, 모음, 숫자 아닌 모든 것. 공백, ., ! [, @, / 특수 문자도 매칭됨
let result = quoteSample.match(myRegex);

let difficultSpelling = 'Mississippis';
let myRegex = /s+/g; // s가 1개 이상 있는 것.
let result = difficultSpelling.match(myRegex); // [ 'ss', 'ss', 's' ]

let chewieQuote = 'Aaaaaaaarrgh!';
let chewieRegex = /Aa*/g; //  *는 0개 이상. 즉, A만 있어도 되긴함.
let result = chewieQuote.match(chewieRegex); // [ 'Aaaaaaaa' ]

let text = 'titanic';
let myRegex = /t[a-z]*i/; // greedy matching. 만족하는 것 중 가장 긴 것
let lazyRegex = /t[a-z]*?i/; // lazy matching. 만족하는 것 중 가장 작은 것
let result = text.match(myRegex); // titani
let result2 = text.match(lazyRegex); // ti

let text = '<h1>Winter is coming</h1>';
let myRegex = /<.*?>/; // 어떤 문자가 0개이상 있되 lazy matching
let result = text.match(myRegex); // <h1/>

// Match Beginning String PatternsPassed
// 주의할 점이 [] 안에 든 ^는 반대를 의미. 여기서 ^X는 X로 패턴이 시작함을 의미

let firstString = "Ricky is first and can be found.";
let firstRegex = /^Ricky/;
firstRegex.test(firstString); // true
let notFirst = "You can't find Ricky now.";
firstRegex.test(notFirst); // false

// Match Ending String Patterns

let theEnding = "This is a never ending story";
let storyRegex = /story$/;
storyRegex.test(theEnding); // true
let noEnding = "Sometimes a story will have to end";
storyRegex.test(noEnding); // false

let longHand = /[A-Za-z0-9_]+/; // 모든 문자, _와 매칭. 단 공백 제외
let shortHand = /\w+/; // [a-zA-Z0-9_] 단 공백 제외

let numbers = "42";
let varNames = "important_var";
longHand.test(numbers); // true
shortHand.test(numbers); // true
longHand.test(varNames); // true
shortHand.test(varNames); // true

let quoteSample = 'The five boxing wizards jump quickly.';

let alphabetRegexV2 = /\w/g; // [a-zA-Z0-9_]
let alphabetRegexV3 = /\w+/g; // [a-zA-Z0-9_]가 1개 이상 존재하는 덩어리

let resultV2 = quoteSample.match(alphabetRegexV2).length; // 31 (문자 갯수)
let resultV3 = quoteSample.match(alphabetRegexV2).length; // 6 (문장 덩어리 갯수)

let quoteSample = 'This! is, special% char& 하 34';
let nonAlphabetRegex = /\W/g; // \W는 \w와 반대 즉, [^A-Za-z0-9_]입니다. 특수 문자와 공백, 한글을 찾아냅니다.
let result = quoteSample.match(nonAlphabetRegex); // ['!', ' ', ',', ' ', '%', ' ', '&', ' ', '하', ' ']

let movieName = '2001: A Space Odyssey';
let numRegex = /\d/g; // \d 는 [0-9]을 의미한다. digit
let result = movieName.match(numRegex); // [ '2', '0', '0', '1' ]

let movieName = '2001: A Space Odyssey';

let numRegex = /\d/g; // \d 는 [0-9]을 의미한다. digit
let notNumRegex = /\D/g; // \D 는 [^0-9]을 의미한다. digit
let result = movieName.match(numRegex); // [ '2', '0', '0', '1' ]
let result2 = movieName.match(notNumRegex); // [ ':', ' ', 'A', ' ', ... ]

let account = 'USD $12.000,00';
let regex = /\$[0-9,.]+/;

console.log(account.match(regex)); // $12.000,00

// 유효한 userName인지 정규식으로 걸러내기
// spec :
// 1. Usernames can only use alpha-numeric characters.
// 2. The only numbers in the username have to be at the end. There can be zero or more of them at the end. Username cannot start with the number.
// 3. Username letters can be lowercase and uppercase.
// 4. Usernames have to be at least two characters long. A two-character username can only use alphabet letters as characters.

let username = 'JackOfAllTrades';
let userCheck = /(^[a-z][a-z]+\d*$|^[a-z]\d\d+$)/i;
// sams as /^[a-z]([0-9]{2,}|[a-z]+\d*)$/i;
let result = userCheck.test(username); // 유효한 username이니? T/F

let whiteSpace = 'Whitespace. Whitespace everywhere!';
let spaceRegex = /\s/g;
let result = whiteSpace.match(spaceRegex); // [ ' ', ' ' ]

let A5 = 'aaaaah';
let A4 = 'aaaah';
let A3 = 'aaah';
let A2 = 'aah';
let multipleA = /a{3,5}h/; // a가 3개 이상 5개 이하 있어야 함

console.log(multipleA.test(A5)); // true
console.log(multipleA.test(A4)); // true
console.log(multipleA.test(A3)); // true
console.log(multipleA.test(A2)); // false

let ohStr = 'Ohhh no';
let ohRegex = /Oh{3,6}\sno/; // Ohhh no, Ohhhh no, ...
let result = ohRegex.test(ohStr);

let AMany = 'haaaaaaaaaaaaaaaaaah';
let A4 = 'haaaah';
let A2 = 'haah';
let multipleA = /ha{3,}h/; // a가 3개 이상이기만 하면 됨.

console.log(multipleA.test(AMany)); // true
console.log(multipleA.test(A4)); // true
console.log(multipleA.test(A2)); // false

let A3 = 'haaah';
let A2 = 'haah';
let multipleA = /ha{3}h/; // a가 딱 3개 있어야 함

console.log(multipleA.test(A3)); // true
console.log(multipleA.test(A2)); // false

let favWord = 'favorite';
let favRegex = /favou?rite/; // u는 있어도 좋고 있어도 (1개) 좋다. 단 u 여러개는 안된다. 그럴거면 *을 써라.
let result = favRegex.test(favWord); // true

let url = 'https://darrengown.tistory.com';
let schemeRegex = /.+(?=:)/; // 긍정 전반탐색. :에 해당하는 곳의 앞을 반환하라. 단, : 제외
const result = url.match(schemeRegex); // https

let cssText = 'color: #ff0000; background: #ddd;';
let regex = /#[0-9A-Fa-f]+(?=;)/g; //cssText에서 color hex 뽑기. 전방탐색 활용

console.log(cssText.match(regex)); // [ '#ff0000', '#ddd' ]

let tsReact = 'app.tsx';
let regex = /\.(js|ts|jsx|tsx)$/; // capturing group
let wrongRgx = /\.js|ts|jsx|tsx$/; 

console.log(tsReact.match(regex)); // .tsx
console.log(tsReact.match(wrongRgx)); // ts. 캡쳐링 안해줘서 앞에 있는 ts에 걸림

let hello = '   Hello, World!  ';
let wsRegex = /^\s+|\s+$/g; // 앞 뒤 공백 추출
let result = hello.replace(wsRegex, ''); // trim!

설명들

basic

[ ] : 대괄호 안에 문자들과 매칭. [abc] 면 a || b || c

- : 범위. [A-Za-z0-9가-힣]는 알파벳 대소, 숫자, 한글 모든 조합을 포함함

\w, \W: word. a-z, A-Z, 숫자, _인 것 한글, 한자는 인식 못 함

\w = 영어 알파벳, 숫자, _ 와 매칭 [a-zA-Z0-9_]

\W = [^a-zA-Z0-9_]

\d = [0-9]

\D = [^0-9]

\s = 공백. 띄어쓰기, 탭, 엔터 다 공백임. [\t\n\r\f\v]

\S = 공백 아닌 모든 것 [^\t\n\r\f\v]

제한자(quantifier)

+ ONE or more

* ZERO or more (즉, 없어도 됨)

? optional. 없거나 1개여야 함. (*과 혼동 주의 *는 여러개 있어도 됨.)

^ 무엇으로 시작함. (\^로 단순 문자 취급 가능)

$ 무엇으로 끝남. (\$ 로 단순 문자 취급 가능)

[^ 반대 (^와 혼동 주의) ex - [^a-d] 는 a, b, c, d가 아닌 모든 것을 말함

{m, n} m개 이상 n개 이하 반복.

{m, } m개 이상이기만 하면 됨.

{m} 정확히 m개 여야 함

Lookaround : Lookahead (전방 탐색) Lookbehind (후방 탐색)

앞, 뒤에 특정 조건을 만족하는 녀석을 탐색하기 위한 겁니다. 말로 해봐야 이해를 못하니 실습 ㄱ

긍정형 전방 (positive lookahead) (?=...)과 부정형 전방 (negative lookahead) (?!...)

const str = "4 dogs wandering in 3AM";

const lookaheadRe = /[0-9]+(?=AM)/; // 긍정 전반탐색. 뒤에 AM이 붙되 숫자를 가져와라
const negativelookaheadRe = /[0-9]+(?!AM)/; // 부정 전반탐색. 뒤에 AM이 붙지 않되 숫자를 가져와라

str.match(lookaheadRe); // 3를 반환
str.match(negativelookaheadRe); // 4를 반환

긍정형 후방(positive lookbehine) (?<=) 과 부정형 후방(negative lookbehine) (?!=)

const str = "tesla moedel 152's price is $4999"
const positiveLookbehindRe = /(?<=\$)[0-9]+/
const negativeLookbehindRe = /(?<!\$)[0-9]+/

str.match(positiveLookbehindRe) // 4999
str.match(negativeLookbehindRe) // 152

flag

i => insensitive

g => global

m => multi line. 탐색하고자 하는 텍스트가 여러 줄일 경우에 사용

lazy matching

기본적으로 regex는 greedy matching이다. lazy matching을 통해 패턴에 매칭되는 것들 중 가장 짧은 녀석을 고를 수 있게 된다.

const str = "bar baar baaaar baaaaaaaar"
const greedyRe = /.*r/
const lazyRe = /.*?r/
str.match(greedyRe) // bar baar baaaar baaaaaaaar
str.match(lazyRe) // bar

?를 붙이면 아래처럼 활용할 수 있다.

+? 1개 이상 있긴 한데 만족하는 것 중 가장 짧은 것

*? 0개 이상 있긴 한데 만족하는 것 중 가장 짧은 것

?? (이거 의미 있긴한가?)

{m, n}? m개 이상 n개 이하 있긴 한데 가장 짧은 것

{m, }? m개 이상 있긴한데 가장 짧은것

💻 실제 프로젝트에서 사용하는 Regex들

* webpack에서 특정 확장자로 끝나는 경우 추출하기

\.는 .이 특별한 의미가 아닌 일반 문자인 .을 의미

css$는 css로 끝나는 것을 의미

()는 Capturing group으로 /\.(js|mjs|jsx|ts|tsx)$/ 와 같이 여러 문자열 중 하나에 matching 시켜야 하는 경우 사용해야 합니다.

만약, /\.js|mjs|jsx|ts|tsx$/ 와 같이 사용한 후 hello.tsx을 검사해보면 .tsx에 걸리는 것이 아니라 .ts에 걸립니다.

/\.(scss)$/

/\.(js)$/

/\.css$/

/\.(ts|tsx)$/

/\.(js|mjs|jsx|ts|tsx)$/

/\.html$/

/\.json$/

* e2e 테스트 파일을 감지하기 위한 정규식

".e2e-spec.ts$"

* gmail.com으로 끝나는가?

/^[A-Za-z-0-9._%+-]+@gmail.com$/

* 글에서 img 태그 분리

/<img[^>]*src=[\"']?([^>\"']+)[\"']?[^>]*>/

* cssText에서 color hex 분리

/#[0-9A-Fa-f]+(?=;)/g;

const cssText = 'color: #ff0000; background: #ddd;';
const regex = /#[0-9A-Fa-f]+(?=;)/g; //cssText에서 color hex 뽑기

cssText.match(regex); // [ '#ff0000', '#ddd' ]

* img 태그에서 src="" 에서 "" 부분 추출

/"(.+?)"/;

그 외 여러 정규식 조각들

[a-zA-Z] a부터 z까지, A부터 Z까지. 즉, 영어 알파벳과 일치

[가-힣] 조합된 한글 모두와 매칭. 단, 낱개의 자음, 모음은 일치 하지 않음

\.{2,} '.'이 두 번 이상 반복될 경우 해당 문자와 해칭(.은 메타 문자이기 때문에 \를 붙여야)

[^a-zA-Z가-힣] 영어와 (조합된) 한글이 아닌 문자와 매칭. 즉, 영어도 아니고 한글도 아닌것.

(다|까)+(\.|\?)$ 문장의 끝이 다. 다? 까. 까? 로 끝나는 것

다\. ? 문자열 중 '다.' 혹은 '다. '가 있을 경우.

[가-힣][은는이가]+ 문자열 중 한글 + [은는이가] 인 경우 매칭

* [0-9a-zA-Z_]+[\.]+(jpg|png|svg|gif)

* http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+

url에서 scheme 발라내기

let url = 'https://darrengown.tistory.com';
let schemeRegex = /.+(?=:)/; // 긍정 전반탐색. :에 해당하는 곳의 앞을 반환하라. 단, : 제외
const result = url.match(schemeRegex); // https

한국형으로 전화번호를 정규식으로 잡아보자면 다음과 같을 것이다.

^(0|1){3}\-[0-9]{4}\-[0-9]{4}$

ref)

정규식 검증 및 활용에 유용한 사이트들

https://regex101.com/

https://ihateregex.io/

https://regexr.com/

참고한 블로그들

https://stackoverflow.com/questions/3512471/what-is-a-non-capturing-group-in-regular-expressions

https://blog.rhostem.com/posts/2018-11-11-regex-capture-group

https://www.slideshare.net/ibare/ss-39274621

https://stackoverflow.com/questions/19605150/regex-for-password-must-contain-at-least-eight-characters-at-least-one-number-a

darrengwon.tistory.com/1271?category=900894

https://evan-moon.github.io/2020/08/15/regex-example/

https://evan-moon.github.io/2020/07/24/about-regular-expression/

저작자표시 (새창열림)

'Programming Language > 🟨 Javascript' 카테고리의 다른 글

ArrayBuffer, Typed Arrays, DataView : raw binary data를 핸들링해보자 (0)	2021.11.15
DOM 구조화 : Range, Parsing, AST 그리고 에디터 (0)	2021.11.01
js의 Number, BigInt 타입과 정밀한 숫자 계산에 대하여 (0)	2021.04.20
throttle과 debounce를 통해 중복된 요청을 줄여보자 (0)	2021.03.16
Date 타입과 ISO, UNIX (0)	2021.02.17

darren, dev blog