mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Merge remote-tracking branch 'origin/topic/timw/906-find-all-urls-regex'
* origin/topic/timw/906-find-all-urls-regex: Restore previous url scheme capture group GH-906: Fix the regex in url.zeek to better match for find_all_urls
This commit is contained in:
commit
2aeb3d8e39
5 changed files with 24 additions and 4 deletions
|
@ -1,7 +1,10 @@
|
|||
##! Functions for URL handling.
|
||||
|
||||
## A regular expression for matching and extracting URLs.
|
||||
const url_regex = /^([a-zA-Z\-]{3,5})(:\/\/[^\/?#"'\r\n><]*)([^?#"'\r\n><]*)([^[:blank:]\r\n"'><]*|\??[^"'\r\n><]*)/ &redef;
|
||||
## This is the @imme_emosol regex from https://mathiasbynens.be/demo/url-regex, adapted for Zeek. It's
|
||||
## not perfect for all of their test cases, but it's one of the shorter ones that covers most of the
|
||||
## test cases.
|
||||
const url_regex = /^([a-zA-Z\-]{3,5}):\/\/(-\.)?([^[:blank:]\/?\.#-]+\.?)+(\/[^[:blank:]]*)?/ &redef;
|
||||
|
||||
## A URI, as parsed by :zeek:id:`decompose_uri`.
|
||||
type URI: record {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue