package html
import "github.com/tdewolff/parse/html"
Package html is an HTML5 lexer following the specifications at http://www.w3.org/TR/html5/syntax.html.
Index ¶
- func EscapeAttrVal(buf *[]byte, orig, b []byte) []byte
- type Hash
- type Lexer
- func NewLexer(r io.Reader) *Lexer
- func (l *Lexer) AttrVal() []byte
- func (l *Lexer) Err() error
- func (l *Lexer) Free(n int)
- func (l *Lexer) Next() (TokenType, []byte)
- func (l *Lexer) Text() []byte
- type TokenType
Examples ¶
Functions ¶
func EscapeAttrVal ¶
EscapeAttrVal returns the escaped attribute value bytes without quotes.
Types ¶
type Hash ¶
type Hash uint32
Hash defines perfect hashes for a predefined list of strings
const ( A Hash = 0x1 // a Abbr Hash = 0x4 // abbr Accept Hash = 0x3206 // accept Accept_Charset Hash = 0x320e // accept-charset Accesskey Hash = 0x4409 // accesskey Acronym Hash = 0xbb07 // acronym Action Hash = 0x2b906 // action Address Hash = 0x67607 // address Align Hash = 0x1605 // align Alink Hash = 0xd205 // alink Allowfullscreen Hash = 0x23c0f // allowfullscreen Alt Hash = 0xeb03 // alt Annotation Hash = 0x2060a // annotation AnnotationXml Hash = 0x2060d // annotationXml Applet Hash = 0x16106 // applet Area Hash = 0x38604 // area Article Hash = 0x40707 // article Aside Hash = 0x8305 // aside Async Hash = 0xf705 // async Audio Hash = 0x11305 // audio Autocomplete Hash = 0x14a0c // autocomplete Autofocus Hash = 0x15609 // autofocus Autoplay Hash = 0x16b08 // autoplay Axis Hash = 0x17304 // axis B Hash = 0x101 // b Background Hash = 0x1e0a // background Base Hash = 0x44d04 // base Basefont Hash = 0x44d08 // basefont Bdi Hash = 0xcb03 // bdi Bdo Hash = 0x18a03 // bdo Bgcolor Hash = 0x19d07 // bgcolor Bgsound Hash = 0x1a407 // bgsound Big Hash = 0x1ac03 // big Blink Hash = 0x1af05 // blink Blockquote Hash = 0x1b40a // blockquote Body Hash = 0x4004 // body Border Hash = 0x33806 // border Br Hash = 0x202 // br Button Hash = 0x1be06 // button Canvas Hash = 0x7f06 // canvas Caption Hash = 0x27e07 // caption Center Hash = 0x62306 // center Challenge Hash = 0x1eb09 // challenge Charset Hash = 0x3907 // charset Checked Hash = 0x3ad07 // checked Cite Hash = 0xfb04 // cite Class Hash = 0x1c905 // class Classid Hash = 0x1c907 // classid Clear Hash = 0x40b05 // clear Code Hash = 0x1dc04 // code Codebase Hash = 0x44908 // codebase Codetype Hash = 0x1dc08 // codetype Col Hash = 0x19f03 // col Colgroup Hash = 0x1f408 // colgroup Color Hash = 0x19f05 // color Cols Hash = 0x20104 // cols Colspan Hash = 0x20107 // colspan Command Hash = 0x21307 // command Compact Hash = 0x21a07 // compact Content Hash = 0x58107 // content Contenteditable Hash = 0x5810f // contenteditable Hash = 0x3b60b // contextmenu Controls Hash = 0x22908 // controls Coords Hash = 0x23506 // coords Crossorigin Hash = 0x25a0b // crossorigin Data Hash = 0x4a604 // data Datalist Hash = 0x4a608 // datalist Datetime Hash = 0x2e908 // datetime Dd Hash = 0x31602 // dd Declare Hash = 0x8607 // declare Default Hash = 0x5407 // default DefaultChecked Hash = 0x4ea0e // defaultChecked DefaultMuted Hash = 0x54b0c // defaultMuted DefaultSelected Hash = 0x540f // defaultSelected Defer Hash = 0x6205 // defer Del Hash = 0x7203 // del Desc Hash = 0x7c04 // desc Details Hash = 0x9207 // details Dfn Hash = 0xab03 // dfn Dialog Hash = 0xcc06 // dialog Dir Hash = 0xd903 // dir Dirname Hash = 0xd907 // dirname Disabled Hash = 0x10108 // disabled Div Hash = 0x10803 // div Dl Hash = 0x1aa02 // dl Download Hash = 0x47f08 // download Draggable Hash = 0x1cf09 // draggable Dropzone Hash = 0x41208 // dropzone Dt Hash = 0x5ff02 // dt Em Hash = 0x6e02 // em Embed Hash = 0x6e05 // embed Enabled Hash = 0x4e07 // enabled Enctype Hash = 0x2ce07 // enctype Face Hash = 0x62104 // face Fieldset Hash = 0x26b08 // fieldset Figcaption Hash = 0x27b0a // figcaption Figure Hash = 0x28f06 // figure Font Hash = 0x45104 // font Hash = 0xee06 // footer For Hash = 0x29b03 // for ForeignObject Hash = 0x29b0d // foreignObject Foreignobject Hash = 0x2a80d // foreignobject Form Hash = 0x2b504 // form Formaction Hash = 0x2b50a // formaction Formenctype Hash = 0x2ca0b // formenctype Formmethod Hash = 0x2d50a // formmethod Formnovalidate Hash = 0x2df0e // formnovalidate Formtarget Hash = 0x2f40a // formtarget Frame Hash = 0xa305 // frame Frameborder Hash = 0x3330b // frameborder Frameset Hash = 0xa308 // frameset H1 Hash = 0x19b02 // h1 H2 Hash = 0x32402 // h2 H3 Hash = 0x34902 // h3 H4 Hash = 0x37f02 // h4 H5 Hash = 0x60102 // h5 H6 Hash = 0x2fe02 // h6 Head Hash = 0x36b04 // head Header Hash = 0x36b06 // header Headers Hash = 0x36b07 // headers Height Hash = 0x30006 // height Hgroup Hash = 0x30806 // hgroup Hidden Hash = 0x31406 // hidden High Hash = 0x32104 // high Hr Hash = 0xaf02 // hr Href Hash = 0xaf04 // href Hreflang Hash = 0xaf08 // hreflang Html Hash = 0x30404 // html Http_Equiv Hash = 0x3260a // http-equiv I Hash = 0x601 // i Icon Hash = 0x58004 // icon Id Hash = 0x8502 // id Iframe Hash = 0x33206 // iframe Image Hash = 0x33e05 // image Img Hash = 0x34303 // img Inert Hash = 0x53605 // inert Input Hash = 0x46c05 // input Ins Hash = 0x26303 // ins Isindex Hash = 0x17507 // isindex Ismap Hash = 0x34b05 // ismap Itemid Hash = 0xfc06 // itemid Itemprop Hash = 0x56e08 // itemprop Itemref Hash = 0x61b07 // itemref Itemscope Hash = 0x35609 // itemscope Itemtype Hash = 0x36008 // itemtype Kbd Hash = 0xca03 // kbd Keygen Hash = 0x4a06 // keygen Keytype Hash = 0x5b007 // keytype Kind Hash = 0xd604 // kind Label Hash = 0x7405 // label Lang Hash = 0xb304 // lang Language Hash = 0xb308 // language Legend Hash = 0x1d606 // legend Li Hash = 0x1702 // li Link Hash = 0xd304 // link List Hash = 0x4aa04 // list Listing Hash = 0x4aa07 // listing Longdesc Hash = 0x7808 // longdesc Loop Hash = 0x11e04 // loop Low Hash = 0x23e03 // low Main Hash = 0x1004 // main Malignmark Hash = 0xc10a // malignmark Manifest Hash = 0x65708 // manifest Map Hash = 0x16003 // map Mark Hash = 0xc704 // mark Marquee Hash = 0x69907 // marquee Math Hash = 0x36804 // math Max Hash = 0x37703 // max Maxlength Hash = 0x37709 // maxlength Media Hash = 0xde05 // media Mediagroup Hash = 0xde0a // mediagroup Menu Hash = 0x3bd04 // menu Meta Hash = 0x4b904 // meta Meter Hash = 0x2ef05 // meter Method Hash = 0x2d906 // method Mglyph Hash = 0x34406 // mglyph Mi Hash = 0x2c02 // mi Min Hash = 0x2c03 // min Mn Hash = 0x2e202 // mn Mo Hash = 0x4dd02 // mo Ms Hash = 0x35902 // ms Mtext Hash = 0x38105 // mtext Multiple Hash = 0x38f08 // multiple Muted Hash = 0x39705 // muted Name Hash = 0xdc04 // name Hash = 0x1303 // nav Nobr Hash = 0x1a04 // nobr Noembed Hash = 0x6c07 // noembed Noframes Hash = 0xa108 // noframes Nohref Hash = 0xad06 // nohref Noresize Hash = 0x24a08 // noresize Noscript Hash = 0x31908 // noscript Noshade Hash = 0x4e507 // noshade Novalidate Hash = 0x2e30a // novalidate Nowrap Hash = 0x57706 // nowrap Object Hash = 0x2af06 // object Ol Hash = 0x12d02 // ol Onabort Hash = 0x1c207 // onabort Onafterprint Hash = 0x2830c // onafterprint Onbeforeprint Hash = 0x2bd0d // onbeforeprint Onbeforeunload Hash = 0x66a0e // onbeforeunload Onblur Hash = 0x14406 // onblur Oncancel Hash = 0x11708 // oncancel Oncanplay Hash = 0x18c09 // oncanplay Oncanplaythrough Hash = 0x18c10 // oncanplaythrough Onchange Hash = 0x42808 // onchange Onclick Hash = 0x6a407 // onclick Onclose Hash = 0x39c07 // onclose Hash = 0x3b40d // oncontextmenu Oncuechange Hash = 0x3c10b // oncuechange Ondblclick Hash = 0x3cc0a // ondblclick Ondrag Hash = 0x3d606 // ondrag Ondragend Hash = 0x3d609 // ondragend Ondragenter Hash = 0x3df0b // ondragenter Ondragleave Hash = 0x3ea0b // ondragleave Ondragover Hash = 0x3f50a // ondragover Ondragstart Hash = 0x3ff0b // ondragstart Ondrop Hash = 0x41006 // ondrop Ondurationchange Hash = 0x42010 // ondurationchange Onemptied Hash = 0x41709 // onemptied Onended Hash = 0x43007 // onended Onerror Hash = 0x43707 // onerror Onfocus Hash = 0x43e07 // onfocus Onhashchange Hash = 0x45e0c // onhashchange Oninput Hash = 0x46a07 // oninput Oninvalid Hash = 0x47109 // oninvalid Onkeydown Hash = 0x47a09 // onkeydown Onkeypress Hash = 0x4870a // onkeypress Onkeyup Hash = 0x49707 // onkeyup Onload Hash = 0x49e06 // onload Onloadeddata Hash = 0x49e0c // onloadeddata Onloadedmetadata Hash = 0x4b110 // onloadedmetadata Onloadstart Hash = 0x4c70b // onloadstart Onmessage Hash = 0x4d209 // onmessage Onmousedown Hash = 0x4db0b // onmousedown Onmousemove Hash = 0x4f80b // onmousemove Onmouseout Hash = 0x5030a // onmouseout Onmouseover Hash = 0x5100b // onmouseover Onmouseup Hash = 0x51b09 // onmouseup Onmousewheel Hash = 0x5240c // onmousewheel Onoffline Hash = 0x53009 // onoffline Ononline Hash = 0x53b08 // ononline Onpagehide Hash = 0x5430a // onpagehide Onpageshow Hash = 0x5570a // onpageshow Onpause Hash = 0x56307 // onpause Onplay Hash = 0x59006 // onplay Onplaying Hash = 0x59009 // onplaying Onpopstate Hash = 0x5990a // onpopstate Onprogress Hash = 0x5a30a // onprogress Onratechange Hash = 0x5b70c // onratechange Onreset Hash = 0x5c307 // onreset Onresize Hash = 0x5ca08 // onresize Onscroll Hash = 0x5d208 // onscroll Onseeked Hash = 0x5dd08 // onseeked Onseeking Hash = 0x5e509 // onseeking Onselect Hash = 0x5ee08 // onselect Onshow Hash = 0x5f806 // onshow Onstalled Hash = 0x60309 // onstalled Onstorage Hash = 0x60c09 // onstorage Onsubmit Hash = 0x61508 // onsubmit Onsuspend Hash = 0x62909 // onsuspend Ontimeupdate Hash = 0x4520c // ontimeupdate Onunload Hash = 0x63208 // onunload Onvolumechange Hash = 0x63a0e // onvolumechange Onwaiting Hash = 0x64809 // onwaiting Open Hash = 0x57404 // open Optgroup Hash = 0x12008 // optgroup Optimum Hash = 0x65107 // optimum Option Hash = 0x66606 // option Output Hash = 0x50a06 // output P Hash = 0xc01 // p Param Hash = 0xc05 // param Pattern Hash = 0x9b07 // pattern Pauseonexit Hash = 0x5650b // pauseonexit Ping Hash = 0xe704 // ping Placeholder Hash = 0x1270b // placeholder Plaintext Hash = 0x17d09 // plaintext Poster Hash = 0x1fb06 // poster Pre Hash = 0x30d03 // pre Preload Hash = 0x30d07 // preload Profile Hash = 0x34f07 // profile Progress Hash = 0x5a508 // progress Prompt Hash = 0x66006 // prompt Public Hash = 0x57c06 // public Q Hash = 0x8d01 // q Radiogroup Hash = 0x30a // radiogroup Rb Hash = 0x1d02 // rb Readonly Hash = 0x38708 // readonly Rel Hash = 0x30e03 // rel Required Hash = 0x8b08 // required Rev Hash = 0x29303 // rev Reversed Hash = 0x29308 // reversed Rows Hash = 0x6604 // rows Rowspan Hash = 0x6607 // rowspan Rp Hash = 0x28902 // rp Rt Hash = 0x1c702 // rt Rtc Hash = 0x1c703 // rtc Ruby Hash = 0xf304 // ruby Rules Hash = 0x13105 // rules S Hash = 0x3d01 // s Samp Hash = 0x9804 // samp Sandbox Hash = 0x13507 // sandbox Scope Hash = 0x35a05 // scope Scoped Hash = 0x35a06 // scoped Script Hash = 0x31b06 // script Scrolling Hash = 0x5d409 // scrolling Seamless Hash = 0x3a108 // seamless Section Hash = 0x13f07 // section Select Hash = 0x5f006 // select Selected Hash = 0x5f008 // selected Shape Hash = 0x23005 // shape Size Hash = 0x24e04 // size Sizes Hash = 0x24e05 // sizes Small Hash = 0x23a05 // small Sortable Hash = 0x25208 // sortable Source Hash = 0x26506 // source Spacer Hash = 0x37106 // spacer Span Hash = 0x6904 // span Spellcheck Hash = 0x3a80a // spellcheck Src Hash = 0x44403 // src Srcdoc Hash = 0x44406 // srcdoc Srclang Hash = 0x49007 // srclang Start Hash = 0x40505 // start Step Hash = 0x65d04 // step Strike Hash = 0x5ac06 // strike Strong Hash = 0x67c06 // strong Style Hash = 0x68205 // style Sub Hash = 0x61703 // sub Summary Hash = 0x68707 // summary Sup Hash = 0x68e03 // sup Svg Hash = 0x69103 // svg System Hash = 0x69406 // system Tabindex Hash = 0x4bf08 // tabindex Table Hash = 0x25505 // table Target Hash = 0x2f806 // target Tbody Hash = 0x3f05 // tbody Td Hash = 0xaa02 // td Text Hash = 0x18204 // text Textarea Hash = 0x38208 // textarea Tfoot Hash = 0xed05 // tfoot Th Hash = 0x19502 // th Thead Hash = 0x36a05 // thead Time Hash = 0x2ed04 // time Title Hash = 0x16605 // title Tr Hash = 0x18502 // tr Track Hash = 0x18505 // track Translate Hash = 0x22009 // translate Truespeed Hash = 0x27209 // truespeed Tt Hash = 0x9d02 // tt Type Hash = 0x10f04 // type Typemustmatch Hash = 0x1e00d // typemustmatch U Hash = 0xb01 // u Ul Hash = 0x5802 // ul Undeterminate Hash = 0x250d // undeterminate Usemap Hash = 0x15d06 // usemap Valign Hash = 0x1506 // valign Value Hash = 0x10a05 // value Valuetype Hash = 0x10a09 // valuetype Var Hash = 0x32f03 // var Video Hash = 0x6a005 // video Visible Hash = 0x6ab07 // visible Vlink Hash = 0x6b205 // vlink Wbr Hash = 0x56003 // wbr Width Hash = 0x5fd05 // width Wrap Hash = 0x57904 // wrap Xmlns Hash = 0x13b05 // xmlns Xmp Hash = 0x17b03 // xmp )
Unique hash definitions to be used instead of strings
func ToHash ¶
ToHash returns the hash whose name is s. It returns zero if there is no such hash. It is case sensitive.
func (Hash) String ¶
String returns the hash' name.
type Lexer ¶
type Lexer struct {
// contains filtered or unexported fields
}
Lexer is the state for the lexer.
func NewLexer ¶
NewLexer returns a new Lexer for a given io.Reader.
Code:
Output:Example¶
{
l := NewLexer(bytes.NewBufferString("<span class='user'>John Doe</span>"))
out := ""
for {
tt, data := l.Next()
if tt == ErrorToken {
break
}
out += string(data)
l.Free(len(data))
}
fmt.Println(out)
// Output: <span class='user'>John Doe</span>
}
<span class='user'>John Doe</span>
func (*Lexer) AttrVal ¶
AttrVal returns the attribute value when an AttributeToken was returned from Next.
func (*Lexer) Err ¶
Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned.
func (*Lexer) Free ¶
Free frees up bytes of length n from previously shifted tokens.
func (*Lexer) Next ¶
Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message.
func (*Lexer) Text ¶
Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters.
type TokenType ¶
type TokenType uint32
TokenType determines the type of token, eg. a number or a semicolon.
const ( ErrorToken TokenType = iota // extra token when errors occur CommentToken DoctypeToken StartTagToken StartTagCloseToken StartTagVoidToken EndTagToken AttributeToken TextToken )
TokenType values.
func (TokenType) String ¶
String returns the string representation of a TokenType.
Source Files ¶
- Version
- v2.0.0+incompatible
- Published
- Apr 10, 2016
- Platform
- darwin/amd64
- Imports
- 4 packages
- Last checked
- 2 hours ago –
Tools for package owners.