From 97dc50bcfb7d4296dd11348b9d7470821fb6afbc Mon Sep 17 00:00:00 2001 From: Feuerfuchs Date: Mon, 18 May 2020 14:14:25 +0200 Subject: WIP: Refactoring --- internal/port/gemini.go | 176 +++++++++++++++++++++++++++++++++++---------- internal/port/gopher.go | 2 +- internal/port/main.go | 2 + pkg/libgemini/libgemini.go | 165 +++++++++++++++++++++++++++++++++++++++++- 4 files changed, 303 insertions(+), 42 deletions(-) diff --git a/internal/port/gemini.go b/internal/port/gemini.go index f9b0b97..b10da7d 100644 --- a/internal/port/gemini.go +++ b/internal/port/gemini.go @@ -1,6 +1,7 @@ package port import ( + "bufio" "bytes" "fmt" "html/template" @@ -15,11 +16,32 @@ import ( "golang.org/x/net/html/charset" "golang.org/x/text/transform" - "git.vulpes.one/Feuerfuchs/port/port/libgemini" + "git.vulpes.one/Feuerfuchs/port/pkg/libgemini" "github.com/temoto/robotstxt" ) +type SectionType byte + +const ( + RAW_TEXT = SectionType(0) + REFLOW_TEXT = SectionType(1) + LINK = SectionType(2) +) + +type Section struct { + Type SectionType + Text string + URL template.URL +} + +type templateVariables struct { + Title string + URI string + Assets AssetList + Sections []Section +} + var ( TermEscapeSGRPattern = regexp.MustCompile("\\[\\d+(;\\d+)*m") ) @@ -53,6 +75,80 @@ func resolveURI(uri string, baseURL *url.URL) (resolvedURI string) { return } +func parseGeminiDocument(body *bytes.Buffer, uri string, hostport string) (sections []Section) { + baseURL, err := url.Parse(fmt.Sprintf( + "gemini://%s/%s", + hostport, + uri, + )) + if err != nil { + return []Section{} + } + + skipSection := true + + section := Section{ + Type: RAW_TEXT, + } + + scanner := bufio.NewScanner(body) + + for scanner.Scan() { + line := strings.Trim(scanner.Text(), "\r\n") + line = TermEscapeSGRPattern.ReplaceAllString(line, "") + + linkMatch := libgemini.LinkPattern.FindStringSubmatch(line) + if len(linkMatch) != 0 && linkMatch[0] != "" { + curType := section.Type + + if !skipSection { + sections = append(sections, section) + } + + label := linkMatch[2] + if label == "" { + label = linkMatch[1] + } + + sections = append(sections, Section{ + Type: LINK, + Text: label, + URL: template.URL(resolveURI(linkMatch[1], baseURL)), + }) + + skipSection = false + section = Section{ + Type: curType, + } + } else { + reflowModeMatch := libgemini.ReflowModePattern.FindStringSubmatch(line) + if len(reflowModeMatch) != 0 { + newType := RAW_TEXT + if section.Type == RAW_TEXT { + newType = REFLOW_TEXT + } + + if !skipSection { + sections = append(sections, section) + } + + skipSection = false + section = Section{ + Type: newType, + } + } else { + section.Text = section.Text + "\n" + line + } + } + } + + if !skipSection { + sections = append(sections, section) + } + + return +} + func GeminiHandler(tpl *template.Template, robotsdata *robotstxt.RobotsData, assetList AssetList, robotsdebug bool) http.HandlerFunc { return func(w http.ResponseWriter, req *http.Request) { agent := req.UserAgent() @@ -80,13 +176,14 @@ func GeminiHandler(tpl *template.Template, robotsdata *robotstxt.RobotsData, ass uri, err := url.QueryUnescape(strings.Join(parts[1:], "/")) if err != nil { - if e := tpl.Execute(w, TemplateVariables{ - Title: title, - URI: hostport, - Assets: assetList, - RawText: fmt.Sprintf("Error: %s", err), - Error: true, - Protocol: "gemini", + if e := tpl.Execute(w, templateVariables{ + Title: title, + URI: hostport, + Assets: assetList, + Sections: []Section{{ + Type: RAW_TEXT, + Text: fmt.Sprintf("Error: %s", err), + }}, }); e != nil { log.Println("Template error: " + e.Error()) log.Println(err.Error()) @@ -108,13 +205,14 @@ func GeminiHandler(tpl *template.Template, robotsdata *robotstxt.RobotsData, ass ) if err != nil { - if e := tpl.Execute(w, TemplateVariables{ - Title: title, - URI: fmt.Sprintf("%s/%s", hostport, uri), - Assets: assetList, - RawText: fmt.Sprintf("Error: %s", err), - Error: true, - Protocol: "gemini", + if e := tpl.Execute(w, templateVariables{ + Title: title, + URI: fmt.Sprintf("%s/%s", hostport, uri), + Assets: assetList, + Sections: []Section{{ + Type: RAW_TEXT, + Text: fmt.Sprintf("Error: %s", err), + }}, }); e != nil { log.Println("Template error: " + e.Error()) log.Println(err.Error()) @@ -129,13 +227,14 @@ func GeminiHandler(tpl *template.Template, robotsdata *robotstxt.RobotsData, ass uri, )) if err != nil { - if e := tpl.Execute(w, TemplateVariables{ - Title: title, - URI: fmt.Sprintf("%s/%s", hostport, uri), - Assets: assetList, - RawText: fmt.Sprintf("Error: %s", err), - Error: true, - Protocol: "gemini", + if e := tpl.Execute(w, templateVariables{ + Title: title, + URI: fmt.Sprintf("%s/%s", hostport, uri), + Assets: assetList, + Sections: []Section{{ + Type: RAW_TEXT, + Text: fmt.Sprintf("Error: %s", err), + }}, }); e != nil { log.Println("Template error: " + e.Error()) log.Println(err.Error()) @@ -148,13 +247,14 @@ func GeminiHandler(tpl *template.Template, robotsdata *robotstxt.RobotsData, ass } if int(res.Header.Status/10) != 2 { - if err := tpl.Execute(w, TemplateVariables{ - Title: title, - URI: fmt.Sprintf("%s/%s", hostport, uri), - Assets: assetList, - RawText: fmt.Sprintf("Error %d: %s", res.Header.Status, res.Header.Meta), - Error: true, - Protocol: "gemini", + if err := tpl.Execute(w, templateVariables{ + Title: title, + URI: fmt.Sprintf("%s/%s", hostport, uri), + Assets: assetList, + Sections: []Section{{ + Type: RAW_TEXT, + Text: fmt.Sprintf("Error %d: %s", res.Header.Status, res.Header.Meta), + }}, }); err != nil { log.Println("Template error: " + err.Error()) } @@ -177,24 +277,22 @@ func GeminiHandler(tpl *template.Template, robotsdata *robotstxt.RobotsData, ass writer.Close() } - var ( - rawText string - items []Item - ) + var sections []Section if strings.HasPrefix(res.Header.Meta, libgemini.MIME_GEMINI) { - items = parseGeminiDocument(buf, uri, hostport) + sections = parseGeminiDocument(buf, uri, hostport) } else { - rawText = buf.String() + sections = append(sections, Section{ + Type: RAW_TEXT, + Text: buf.String(), + }) } - if err := tpl.Execute(w, TemplateVariables{ + if err := tpl.Execute(w, templateVariables{ Title: title, URI: fmt.Sprintf("%s/%s", hostport, uri), Assets: assetList, - Lines: items, - RawText: rawText, - Protocol: "gemini", + Sections: sections, }); err != nil { log.Println("Template error: " + err.Error()) } diff --git a/internal/port/gopher.go b/internal/port/gopher.go index ebeb213..abbc4d9 100644 --- a/internal/port/gopher.go +++ b/internal/port/gopher.go @@ -11,7 +11,7 @@ import ( "net/url" "strings" - "git.vulpes.one/Feuerfuchs/port/port/libgopher" + "git.vulpes.one/Feuerfuchs/port/pkg/libgopher" "github.com/davidbyttow/govips/pkg/vips" "github.com/temoto/robotstxt" diff --git a/internal/port/main.go b/internal/port/main.go index 5cdd794..9fa245e 100644 --- a/internal/port/main.go +++ b/internal/port/main.go @@ -205,6 +205,8 @@ func ListenAndServe(bind, startpagefile string, robotsfile string, robotsdebug b // // + var templates *template.Template + var allFiles []string files, err := ioutil.ReadDir("./tpl") if err != nil { diff --git a/pkg/libgemini/libgemini.go b/pkg/libgemini/libgemini.go index 303490c..71012ef 100644 --- a/pkg/libgemini/libgemini.go +++ b/pkg/libgemini/libgemini.go @@ -2,9 +2,11 @@ package libgemini import ( "bufio" + "bytes" "crypto/tls" "errors" "fmt" + "html/template" "io" "mime" "net" @@ -49,8 +51,14 @@ const ( ) var ( - HeaderPattern = regexp.MustCompile("^(\\d\\d)[ \\t]+(.*)$") - LinkPattern = regexp.MustCompile("^=>[ \\t]*([^ \\t]+)(?:[ \\t]+(.*))?$") + HeaderPattern = regexp.MustCompile("^(\\d\\d)[ \\t]+(.*)$") + LinkPattern = regexp.MustCompile("^=>[ \\t]*([^ \\t]+)(?:[ \\t]+(.*))?$") + ReflowModePattern = regexp.MustCompile("^```(.*)$") + Heading1Pattern = regexp.MustCompile("^#(.*)$") + Heading2Pattern = regexp.MustCompile("^##(.*)$") + Heading3Pattern = regexp.MustCompile("^###(.*)$") + ListItemPattern = regexp.MustCompile("^\\*(.*)$") + TermEscapeSGRPattern = regexp.MustCompile("\\[\\d+(;\\d+)*m") ) type Header struct { @@ -63,6 +71,25 @@ type Response struct { Body io.Reader } +type GeminiDocSectionType byte + +const ( + RAW_TEXT = SectionType(0) + REFLOW_TEXT = SectionType(1) + LINK = SectionType(2) + HEADING_1 = SectionType(3) + HEADING_2 = SectionType(4) + HEADING_3 = SectionType(5) + LIST = SectionType(6) +) + +type GeminiDocSection struct { + Type SectionType + Text string + URL template.URL + Items []string +} + func Get(uri string) (*Response, error) { u, err := url.Parse(uri) if err != nil { @@ -143,3 +170,137 @@ func ParseHeader(line string) (header *Header, err error) { return } + +func ParseGeminiDocument(body *bytes.Buffer) (sections []Section) { + scanner := bufio.NewScanner(body) + + reflow := true + ignoreSection := true + section := Section{ + Type: REFLOW_TEXT + } + + for scanner.Scan() { + line := strings.Trim(scanner.Text(), CRLF) + line = TermEscapeSGRPattern.ReplaceAllString(line, "") + + reflowMatch := ReflowModePattern.FindStringSubmatch(line) + if len(heading3Match) != 0 { + reflow = !reflow + continue + } + + if !reflow { + if !ignoreSection { + if section.Type != REFLOW_TEXT { + sections = append(sections, section) + section = Section{ + Type: REFLOW_TEXT + } + } + } else { + ignoreSection = false + section = Section{ + Type: REFLOW_TEXT + } + } + + section.Text = section.Text + "\n" + line + + continue + } + + linkMatch := LinkPattern.FindStringSubmatch(line) + if len(linkMatch) != 0 && linkMatch[0] != "" { + if !ignoreSection { + sections = append(sections, section) + } + + label := linkMatch[2] + if label == "" { + label = linkMatch[1] + } + + ignoreSection = false + section = Section{ + Type: LINK, + Text: label, + URL: template.URL(resolveURI(linkMatch[1], baseURL)), + } + + continue + } + + heading3Match := Heading3Pattern.FindStringSubmatch(line) + if len(heading3Match) != 0 { + if !ignoreSection { + sections = append(sections, section) + } + + ignoreSection = false + section = Section{ + Type: HEADING_3, + Text: heading3Match[1] + } + + continue + } + + heading2Match := Heading2Pattern.FindStringSubmatch(line) + if len(heading2Match) != 0 { + if !ignoreSection { + sections = append(sections, section) + } + + ignoreSection = false + section = Section{ + Type: HEADING_2, + Text: heading2Match[1] + } + + continue + } + + heading1Match := Heading1Pattern.FindStringSubmatch(line) + if len(heading1Match) != 0 { + if !ignoreSection { + sections = append(sections, section) + } + + ignoreSection = false + section = Section{ + Type: HEADING_1, + Text: heading1Match[1] + } + + continue + } + + listItemMatch := ListItemPattern.FindStringSubmatch(line) + if len(listItemMatch) != 0 { + if !ignoreSection { + if section.Type != LIST { + sections = append(sections, section) + section = Section{ + Type: LIST + } + } + } else { + ignoreSection = false + section = Section{ + Type: LIST, + } + } + + section.Items = append(section.Items, listItemMatch[1]) + + continue + } + } + + if !ignoreSection { + sections = append(sections, section) + } + + return +} -- cgit v1.2.3-70-g09d2