package html
import "github.com/tdewolff/parse/v2/html"
Package html is an HTML5 lexer following the specifications at http://www.w3.org/TR/html5/syntax.html.
Index ¶
- Variables
- func EscapeAttrVal(buf *[]byte, b []byte, origQuote byte, mustQuote bool) []byte
- type Hash
- type Lexer
- func NewLexer(r *parse.Input) *Lexer
- func NewTemplateLexer(r *parse.Input, tmpl [2]string) *Lexer
- func (l *Lexer) AttrKey() []byte
- func (l *Lexer) AttrVal() []byte
- func (l *Lexer) Err() error
- func (l *Lexer) HasTemplate() bool
- func (l *Lexer) Next() (TokenType, []byte)
- func (l *Lexer) Text() []byte
- type TokenType
Examples ¶
Variables ¶
var ASPTemplate = [2]string{"<%", "%>"}
var EJSTemplate = [2]string{"<%", "%>"}
var GoTemplate = [2]string{"{{", "}}"}
var HandlebarsTemplate = [2]string{"{{", "}}"}
var MustacheTemplate = [2]string{"{{", "}}"}
var PHPTemplate = [2]string{"<?", "?>"}
Functions ¶
func EscapeAttrVal ¶
EscapeAttrVal returns the escaped attribute value bytes with quotes. Either single or double quotes are used, whichever is shorter. If there are no quotes present in the value and the value is in HTML (not XML), it will return the value without quotes.
Types ¶
type Hash ¶
type Hash uint32
Hash defines perfect hashes for a predefined list of strings
const ( Iframe Hash = 0x6 // iframe Math Hash = 0x604 // math Plaintext Hash = 0x1e09 // plaintext Script Hash = 0xa06 // script Style Hash = 0x1405 // style Svg Hash = 0x1903 // svg Textarea Hash = 0x2308 // textarea Title Hash = 0xf05 // title Xmp Hash = 0x1c03 // xmp )
Unique hash definitions to be used instead of strings
func ToHash ¶
ToHash returns the hash whose name is s. It returns zero if there is no such hash. It is case sensitive.
func (Hash) String ¶
String returns the hash' name.
type Lexer ¶
type Lexer struct {
// contains filtered or unexported fields
}
Lexer is the state for the lexer.
func NewLexer ¶
func NewLexer(r *parse.Input) *Lexer
NewLexer returns a new Lexer for a given io.Reader.
Code:
Output:Example¶
{
l := NewLexer(parse.NewInputString("<span class='user'>John Doe</span>"))
out := ""
for {
tt, data := l.Next()
if tt == ErrorToken {
break
}
out += string(data)
}
fmt.Println(out)
// Output: <span class='user'>John Doe</span>
}
<span class='user'>John Doe</span>
func NewTemplateLexer ¶
func (*Lexer) AttrKey ¶
AttrKey returns the attribute key when an AttributeToken was returned from Next.
func (*Lexer) AttrVal ¶
AttrVal returns the attribute value when an AttributeToken was returned from Next.
func (*Lexer) Err ¶
Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned.
func (*Lexer) HasTemplate ¶
HasTemplate returns the true if the token value contains a template.
func (*Lexer) Next ¶
Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message.
func (*Lexer) Text ¶
Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters.
type TokenType ¶
type TokenType uint32
TokenType determines the type of token, eg. a number or a semicolon.
const ( ErrorToken TokenType = iota // extra token when errors occur CommentToken DoctypeToken StartTagToken StartTagCloseToken StartTagVoidToken EndTagToken AttributeToken TextToken SvgToken MathToken )
TokenType values.
func (TokenType) String ¶
String returns the string representation of a TokenType.
Source Files ¶
- Version
- v2.7.20 (latest)
- Published
- Jan 28, 2025
- Platform
- linux/amd64
- Imports
- 2 packages
- Last checked
- 21 hours ago –
Tools for package owners.