From 67571c49c119993ce553bec68c87e59332b5e03c Mon Sep 17 00:00:00 2001 From: Feuerfuchs Date: Sat, 16 Nov 2019 09:59:03 +0100 Subject: Gemini: Add support for charset MIME parameter --- go.mod | 2 ++ go.sum | 1 + gopherproxy.go | 48 +++++++++++++++++++++++++++++------------------- libgemini.go | 14 ++++++++------ 4 files changed, 40 insertions(+), 25 deletions(-) diff --git a/go.mod b/go.mod index b8e8baf..503bb87 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,8 @@ require ( github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 github.com/prologic/go-gopher v0.0.0-20181230133552-0c68ed5f58b0 github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea + golang.org/x/net v0.0.0-20190311183353-d8887717615a + golang.org/x/text v0.3.0 ) go 1.13 diff --git a/go.sum b/go.sum index 544eee6..7f255e0 100644 --- a/go.sum +++ b/go.sum @@ -83,6 +83,7 @@ golang.org/x/sync v0.0.0-20190412183630-56d357773e84/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20190329151228-23e29df326fe/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190404132500-923d25813098 h1:MtqjsZmyGRgMmLUgxnmMJ6RYdvd2ib8ipiayHhqSxs4= diff --git a/gopherproxy.go b/gopherproxy.go index ab8e906..65c8f89 100644 --- a/gopherproxy.go +++ b/gopherproxy.go @@ -10,11 +10,15 @@ import ( "io" "io/ioutil" "log" + "mime" "net/http" "net/url" "regexp" "strings" + "golang.org/x/net/html/charset" + "golang.org/x/text/transform" + "github.com/temoto/robotstxt" "github.com/prologic/go-gopher" @@ -133,7 +137,7 @@ func renderGopherDirectory(w http.ResponseWriter, tpl *template.Template, assetL }{title, fmt.Sprintf("%s/%s", hostport, uri), assetList, out, "", false, "gopher"}) } -func parseGeminiDocument(response *GeminiResponse, uri string, hostport string) (items []Item) { +func parseGeminiDocument(body *bytes.Buffer, uri string, hostport string) (items []Item) { baseUrl, err := url.Parse(fmt.Sprintf( "gemini://%s/%s", hostport, @@ -143,8 +147,7 @@ func parseGeminiDocument(response *GeminiResponse, uri string, hostport string) return []Item{} } - scanner := bufio.NewScanner(response.Body) - scanner.Split(bufio.ScanLines) + scanner := bufio.NewScanner(body) for scanner.Scan() { line := strings.Trim(scanner.Text(), "\r\n") @@ -380,25 +383,32 @@ func GeminiHandler(tpl *template.Template, robotsdata *robotstxt.RobotsData, ass } if strings.HasPrefix(res.Header.Meta, "text/") { - // TODO: Handle encoding - // https://stackoverflow.com/questions/32518432/how-to-convert-from-an-encoding-to-utf-8-in-go + buf := new(bytes.Buffer) + + _, params, err := mime.ParseMediaType(res.Header.Meta) + if err != nil { + buf.ReadFrom(res.Body) + } else { + encoding, _ := charset.Lookup(params["charset"]) + readbuf := new(bytes.Buffer) + readbuf.ReadFrom(res.Body) + + writer := transform.NewWriter(buf, encoding.NewDecoder()) + writer.Write(readbuf.Bytes()) + writer.Close() + } + + var ( + rawText string + items []Item + ) if strings.HasPrefix(res.Header.Meta, MIME_GEMINI) { - items := parseGeminiDocument(res, uri, hostport) - tpl.Execute(w, struct { - Title string - URI string - Assets AssetList - RawText string - Lines []Item - Error bool - Protocol string - }{uri, fmt.Sprintf("%s/%s", hostport, uri), assetList, "", items, false, "gemini"}) - return + items = parseGeminiDocument(buf, uri, hostport) + } else { + rawText = buf.String() } - buf := new(bytes.Buffer) - buf.ReadFrom(res.Body) tpl.Execute(w, struct { Title string URI string @@ -407,7 +417,7 @@ func GeminiHandler(tpl *template.Template, robotsdata *robotstxt.RobotsData, ass Lines []Item Error bool Protocol string - }{uri, fmt.Sprintf("%s/%s", hostport, uri), assetList, buf.String(), nil, false, "gemini"}) + }{uri, fmt.Sprintf("%s/%s", hostport, uri), assetList, rawText, items, false, "gemini"}) } else { io.Copy(w, res.Body) } diff --git a/libgemini.go b/libgemini.go index 56e1463..20fc137 100644 --- a/libgemini.go +++ b/libgemini.go @@ -5,6 +5,7 @@ import ( "crypto/tls" "errors" "io" + "mime" "net/url" "regexp" "strconv" @@ -134,13 +135,14 @@ func ParseGeminiHeader(line string) (header *GeminiHeader, err error) { meta := matches[2] if int(status/10) == 2 { - if meta == "" { - meta = DEFAULT_MIME + ";charset=" + DEFAULT_CHARSET - } + mediaType, params, err := mime.ParseMediaType(meta) - mimeType := MimeTypePattern.FindString(meta) - if strings.HasPrefix(mimeType, "text/") && MimeCharsetPattern.FindString(meta) == "" { - meta += ";charset=" + DEFAULT_CHARSET + if err != nil { + meta = DEFAULT_MIME + ";charset=" + DEFAULT_CHARSET + } else if strings.HasPrefix(mediaType, "text/") { + if _, ok := params["charset"]; !ok { + meta += ";charset=" + DEFAULT_CHARSET + } } } -- cgit v1.2.3-70-g09d2