Idiomatically determine all the characters that can be used for symbols: Difference between revisions
Content added Content deleted
(added Ol) |
(→{{header|Go}}: Replace with really idiomatic code; the previous code had too many magic numbers) |
||
Line 70:
=={{header|Go}}==
Most of the code is concerned with printing the Unicode ranges of the valid characters. The remaining part of the code parses the possible identifier and verifies that it is indeed an identifier.
<lang go>package main
import (
"fmt"
"go/ast"
"go/parser"
"strings"
"unicode"
)
type runeRanges struct {
ranges []string
hasStart bool
start rune
end rune
}
func (r *runeRanges) add(cp rune) {
if !r.hasStart {
r.hasStart = true
r.start = cp
r.end = cp
return
}
if cp == r.end+1 {
r.end = cp
return
}
r.writeTo(&r.ranges)
r.start = cp
r.end = cp
}
func (r *runeRanges) writeTo(ranges *[]string) {
if r.hasStart {
if r.start == r.end {
*ranges = append(*ranges, fmt.Sprintf("%U", r.end))
} else {
*ranges = append(*ranges, fmt.Sprintf("%U-%U", r.start, r.end))
}
}
}
func (r *runeRanges) String() string {
ranges := r.ranges
r.writeTo(&ranges)
return strings.Join(ranges, ", ")
}
func isValidIdentifier(identifier string) bool {
node, err := parser.ParseExpr(identifier)
if err != nil {
return false
}
ident, ok := node.(*ast.Ident)
return ok && ident.Name == identifier
}
func main() {
var validFirst runeRanges
var validFollow runeRanges
var validOnlyFollow runeRanges
for r := rune(0); r <= unicode.MaxRune; r++ {
first := isValidIdentifier(string([]rune{r}))
follow := isValidIdentifier(string([]rune{'_', r}))
if first {
validFirst.add(r)
}
if follow {
validFollow.add(r)
}
if follow && !first {
validOnlyFollow.add(r)
}
}
_, _ = fmt.Println("Valid first:", validFirst.String())
_, _ = fmt.Println("Valid follow:", validFollow.String())
_, _ = fmt.Println("Only follow:", validOnlyFollow.String())
}</lang>
{{out}}
<pre>
Valid first: U+0041-U+005A, U+005F, U+0061-U+007A, U+00AA, ..., U+00F8-U+02C1, U+02C6-U+02D1, ...
Valid follow: U+0030-U+0039, U+0041-U+005A, U+005F, U+0061-U+007A, U+00AA, ..., U+00F8-U+02C1, ..., U+2CEB0-U+2EBE0, U+2F800-U+2FA1D
Only follow: U+0030-U+0039, U+0660-U+0669, U+06F0-U+06F9, U+07C0-U+07C9, ..., U+1D7CE-U+1D7FF, U+1E950-U+1E959
</pre>
|