package html
import "github.com/tdewolff/parse/v2/html"
Package html is an HTML5 lexer following the specifications at http://www.w3.org/TR/html5/syntax.html.
Index ¶
- func EscapeAttrVal(buf *[]byte, orig, b []byte, isXML bool) []byte
- type Hash
- type Lexer
- func NewLexer(r io.Reader) *Lexer
- func (l *Lexer) AttrVal() []byte
- func (l *Lexer) Err() error
- func (l *Lexer) Next() (TokenType, []byte)
- func (l *Lexer) Restore()
- func (l *Lexer) Text() []byte
- type TokenType
Examples ¶
Functions ¶
func EscapeAttrVal ¶
EscapeAttrVal returns the escaped attribute value bytes without quotes.
Types ¶
type Hash ¶
type Hash uint32
Hash defines perfect hashes for a predefined list of strings
const ( A Hash = 0x1 // a Abbr Hash = 0x36904 // abbr About Hash = 0x5 // about Accept Hash = 0x1106 // accept Accept_Charset Hash = 0x110e // accept-charset Action Hash = 0x25c06 // action Address Hash = 0x5a07 // address Align Hash = 0x15105 // align Alink Hash = 0x8f05 // alink Allowfullscreen Hash = 0x2340f // allowfullscreen Area Hash = 0x12c04 // area Article Hash = 0x2707 // article Aside Hash = 0x7205 // aside Async Hash = 0xcb05 // async Audio Hash = 0xdc05 // audio Autofocus Hash = 0xfc09 // autofocus Autoplay Hash = 0x11508 // autoplay Axis Hash = 0x11d04 // axis B Hash = 0x101 // b Background Hash = 0x300a // background Base Hash = 0x18104 // base Bb Hash = 0x36a02 // bb Bdi Hash = 0x9403 // bdi Bdo Hash = 0x32f03 // bdo Bgcolor Hash = 0x13507 // bgcolor Blockquote Hash = 0x13f0a // blockquote Body Hash = 0xd04 // body Br Hash = 0x36b02 // br Button Hash = 0x14906 // button Canvas Hash = 0x6e06 // canvas Caption Hash = 0x21e07 // caption Charset Hash = 0x1807 // charset Checked Hash = 0x1ae07 // checked Cite Hash = 0xcf04 // cite Class Hash = 0x16305 // class Classid Hash = 0x16307 // classid Clear Hash = 0x2b05 // clear Code Hash = 0x17d04 // code Codebase Hash = 0x17d08 // codebase Codetype Hash = 0x19f08 // codetype Col Hash = 0x13703 // col Colgroup Hash = 0x1c608 // colgroup Color Hash = 0x13705 // color Cols Hash = 0x1db04 // cols Colspan Hash = 0x1db07 // colspan Compact Hash = 0x1f507 // compact Content Hash = 0x2a207 // content Controls Hash = 0x20008 // controls Data Hash = 0x1f04 // data Datalist Hash = 0x1f08 // datalist Datatype Hash = 0x4d08 // datatype Dd Hash = 0x5b02 // dd Declare Hash = 0x7507 // declare Default Hash = 0x9e07 // default DefaultChecked Hash = 0x1e60e // defaultChecked DefaultMuted Hash = 0x9e0c // defaultMuted DefaultSelected Hash = 0xa90f // defaultSelected Defer Hash = 0xb705 // defer Del Hash = 0xd903 // del Details Hash = 0x16907 // details Dfn Hash = 0x19103 // dfn Dialog Hash = 0xc506 // dialog Dir Hash = 0x9503 // dir Disabled Hash = 0x1b408 // disabled Div Hash = 0x1bb03 // div Dl Hash = 0x1f302 // dl Dt Hash = 0x24e02 // dt Em Hash = 0x4302 // em Embed Hash = 0x4905 // embed Enabled Hash = 0x28a07 // enabled Enctype Hash = 0x33307 // enctype Face Hash = 0x5604 // face Fieldset Hash = 0x21308 // fieldset Figcaption Hash = 0x21b0a // figcaption Figure Hash = 0x24306 // figure Hash = 0xe606 // footer For Hash = 0x25803 // for Form Hash = 0x25804 // form Formaction Hash = 0x2580a // formaction Formnovalidate Hash = 0x2620e // formnovalidate Frame Hash = 0x2aa05 // frame Frameborder Hash = 0x2aa0b // frameborder H1 Hash = 0x2f502 // h1 H2 Hash = 0x27002 // h2 H3 Hash = 0x27202 // h3 H4 Hash = 0x27402 // h4 H5 Hash = 0x27602 // h5 H6 Hash = 0x27802 // h6 Head Hash = 0x2e704 // head Header Hash = 0x2e706 // header Hgroup Hash = 0x27a06 // hgroup Hidden Hash = 0x28606 // hidden Hr Hash = 0x15702 // hr Href Hash = 0x15704 // href Hreflang Hash = 0x15708 // hreflang Html Hash = 0x29104 // html Http_Equiv Hash = 0x2950a // http-equiv I Hash = 0x2401 // i Icon Hash = 0x2a104 // icon Id Hash = 0x7402 // id Iframe Hash = 0x2a906 // iframe Img Hash = 0x2b503 // img Inert Hash = 0x6905 // inert Inlist Hash = 0x2b806 // inlist Input Hash = 0x2c205 // input Ins Hash = 0x2c703 // ins Ismap Hash = 0x11f05 // ismap Itemscope Hash = 0xd009 // itemscope Kbd Hash = 0x9303 // kbd Label Hash = 0x7c05 // label Lang Hash = 0x15b04 // lang Language Hash = 0x15b08 // language Legend Hash = 0x2da06 // legend Li Hash = 0x2302 // li Link Hash = 0x9004 // link Longdesc Hash = 0x8008 // longdesc Main Hash = 0x6704 // main Manifest Hash = 0x2d108 // manifest Map Hash = 0x10603 // map Mark Hash = 0x2e004 // mark Math Hash = 0x2e404 // math Max Hash = 0x2ed03 // max Maxlength Hash = 0x2ed09 // maxlength Media Hash = 0xc305 // media Menu Hash = 0xf804 // menu Meta Hash = 0x2f704 // meta Meter Hash = 0x30f05 // meter Method Hash = 0x31406 // method Multiple Hash = 0x31a08 // multiple Muted Hash = 0x32205 // muted Name Hash = 0xc104 // name Hash = 0x14e03 // nav Nohref Hash = 0x15506 // nohref Noresize Hash = 0x17508 // noresize Noscript Hash = 0x19308 // noscript Noshade Hash = 0x1e107 // noshade Novalidate Hash = 0x2660a // novalidate Nowrap Hash = 0x22406 // nowrap Object Hash = 0xe006 // object Ol Hash = 0x8902 // ol Open Hash = 0x33104 // open Optgroup Hash = 0x34b08 // optgroup Option Hash = 0x32706 // option Output Hash = 0x206 // output P Hash = 0x501 // p Param Hash = 0x6305 // param Pauseonexit Hash = 0xec0b // pauseonexit Picture Hash = 0x10807 // picture Plaintext Hash = 0x12309 // plaintext Poster Hash = 0x1cd06 // poster Pre Hash = 0x22903 // pre Prefix Hash = 0x22906 // prefix Profile Hash = 0x27f07 // profile Progress Hash = 0x30208 // progress Property Hash = 0x35208 // property Q Hash = 0x14401 // q Rb Hash = 0x2f02 // rb Readonly Hash = 0x12d08 // readonly Rel Hash = 0x7a03 // rel Required Hash = 0x24708 // required Resource Hash = 0x10d08 // resource Rev Hash = 0x9703 // rev Reversed Hash = 0x9708 // reversed Rows Hash = 0xbb04 // rows Rowspan Hash = 0xbb07 // rowspan Rp Hash = 0xeb02 // rp Rt Hash = 0x2802 // rt Rtc Hash = 0x6c03 // rtc Ruby Hash = 0x13b04 // ruby Rules Hash = 0x1d205 // rules S Hash = 0x1c01 // s Samp Hash = 0x6004 // samp Scope Hash = 0xd405 // scope Scoped Hash = 0xd406 // scoped Script Hash = 0x19506 // script Scrolling Hash = 0x8609 // scrolling Seamless Hash = 0x18308 // seamless Section Hash = 0x16f07 // section Select Hash = 0x18a06 // select Selected Hash = 0x18a08 // selected Shape Hash = 0x1d605 // shape Size Hash = 0x17904 // size Slot Hash = 0x20704 // slot Small Hash = 0x23205 // small Sortable Hash = 0x2c908 // sortable Source Hash = 0x10f06 // source Span Hash = 0xbe04 // span Src Hash = 0x30903 // src Srcset Hash = 0x30906 // srcset Start Hash = 0x2505 // start Strong Hash = 0x2bc06 // strong Style Hash = 0x2d705 // style Sub Hash = 0x32d03 // sub Summary Hash = 0x33a07 // summary Sup Hash = 0x34103 // sup Svg Hash = 0x34403 // svg Tabindex Hash = 0x2f908 // tabindex Table Hash = 0x2cc05 // table Target Hash = 0x706 // target Tbody Hash = 0xc05 // tbody Td Hash = 0x1e02 // td Template Hash = 0x4208 // template Text Hash = 0x12804 // text Textarea Hash = 0x12808 // textarea Tfoot Hash = 0xe505 // tfoot Th Hash = 0x2e602 // th Thead Hash = 0x2e605 // thead Time Hash = 0xf604 // time Title Hash = 0x19a05 // title Tr Hash = 0x1fb02 // tr Track Hash = 0x1fb05 // track Translate Hash = 0x20a09 // translate Truespeed Hash = 0x24f09 // truespeed Type Hash = 0x5104 // type Typemustmatch Hash = 0x1a30d // typemustmatch Typeof Hash = 0x5106 // typeof U Hash = 0x301 // u Ul Hash = 0xa202 // ul Undeterminate Hash = 0x370d // undeterminate Usemap Hash = 0x10306 // usemap Valign Hash = 0x15006 // valign Value Hash = 0x1bd05 // value Valuetype Hash = 0x1bd09 // valuetype Var Hash = 0x29e03 // var Video Hash = 0x34705 // video Visible Hash = 0x35a07 // visible Vlink Hash = 0x36105 // vlink Vocab Hash = 0x36605 // vocab Wbr Hash = 0x36d03 // wbr Xmlns Hash = 0x22e05 // xmlns Xmp Hash = 0x30003 // xmp )
Unique hash definitions to be used instead of strings
func ToHash ¶
ToHash returns the hash whose name is s. It returns zero if there is no such hash. It is case sensitive.
func (Hash) String ¶
String returns the hash' name.
type Lexer ¶
type Lexer struct {
// contains filtered or unexported fields
}
Lexer is the state for the lexer.
func NewLexer ¶
NewLexer returns a new Lexer for a given io.Reader.
Code:
Output:Example¶
{
l := NewLexer(bytes.NewBufferString("<span class='user'>John Doe</span>"))
out := ""
for {
tt, data := l.Next()
if tt == ErrorToken {
break
}
out += string(data)
}
fmt.Println(out)
// Output: <span class='user'>John Doe</span>
}
<span class='user'>John Doe</span>
func (*Lexer) AttrVal ¶
AttrVal returns the attribute value when an AttributeToken was returned from Next.
func (*Lexer) Err ¶
Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned.
func (*Lexer) Next ¶
Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message.
func (*Lexer) Restore ¶
func (l *Lexer) Restore()
Restore restores the NULL byte at the end of the buffer.
func (*Lexer) Text ¶
Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters.
type TokenType ¶
type TokenType uint32
TokenType determines the type of token, eg. a number or a semicolon.
const ( ErrorToken TokenType = iota // extra token when errors occur CommentToken DoctypeToken StartTagToken StartTagCloseToken StartTagVoidToken EndTagToken AttributeToken TextToken SvgToken MathToken )
TokenType values.
func (TokenType) String ¶
String returns the string representation of a TokenType.
Source Files ¶
- Version
- v2.3.10
- Published
- Nov 20, 2019
- Platform
- js/wasm
- Imports
- 4 packages
- Last checked
- 8 hours ago –
Tools for package owners.