From 97dc50bcfb7d4296dd11348b9d7470821fb6afbc Mon Sep 17 00:00:00 2001 From: Feuerfuchs Date: Mon, 18 May 2020 14:14:25 +0200 Subject: WIP: Refactoring --- pkg/libgemini/libgemini.go | 165 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 163 insertions(+), 2 deletions(-) (limited to 'pkg/libgemini') diff --git a/pkg/libgemini/libgemini.go b/pkg/libgemini/libgemini.go index 303490c..71012ef 100644 --- a/pkg/libgemini/libgemini.go +++ b/pkg/libgemini/libgemini.go @@ -2,9 +2,11 @@ package libgemini import ( "bufio" + "bytes" "crypto/tls" "errors" "fmt" + "html/template" "io" "mime" "net" @@ -49,8 +51,14 @@ const ( ) var ( - HeaderPattern = regexp.MustCompile("^(\\d\\d)[ \\t]+(.*)$") - LinkPattern = regexp.MustCompile("^=>[ \\t]*([^ \\t]+)(?:[ \\t]+(.*))?$") + HeaderPattern = regexp.MustCompile("^(\\d\\d)[ \\t]+(.*)$") + LinkPattern = regexp.MustCompile("^=>[ \\t]*([^ \\t]+)(?:[ \\t]+(.*))?$") + ReflowModePattern = regexp.MustCompile("^```(.*)$") + Heading1Pattern = regexp.MustCompile("^#(.*)$") + Heading2Pattern = regexp.MustCompile("^##(.*)$") + Heading3Pattern = regexp.MustCompile("^###(.*)$") + ListItemPattern = regexp.MustCompile("^\\*(.*)$") + TermEscapeSGRPattern = regexp.MustCompile("\\[\\d+(;\\d+)*m") ) type Header struct { @@ -63,6 +71,25 @@ type Response struct { Body io.Reader } +type GeminiDocSectionType byte + +const ( + RAW_TEXT = SectionType(0) + REFLOW_TEXT = SectionType(1) + LINK = SectionType(2) + HEADING_1 = SectionType(3) + HEADING_2 = SectionType(4) + HEADING_3 = SectionType(5) + LIST = SectionType(6) +) + +type GeminiDocSection struct { + Type SectionType + Text string + URL template.URL + Items []string +} + func Get(uri string) (*Response, error) { u, err := url.Parse(uri) if err != nil { @@ -143,3 +170,137 @@ func ParseHeader(line string) (header *Header, err error) { return } + +func ParseGeminiDocument(body *bytes.Buffer) (sections []Section) { + scanner := bufio.NewScanner(body) + + reflow := true + ignoreSection := true + section := Section{ + Type: REFLOW_TEXT + } + + for scanner.Scan() { + line := strings.Trim(scanner.Text(), CRLF) + line = TermEscapeSGRPattern.ReplaceAllString(line, "") + + reflowMatch := ReflowModePattern.FindStringSubmatch(line) + if len(heading3Match) != 0 { + reflow = !reflow + continue + } + + if !reflow { + if !ignoreSection { + if section.Type != REFLOW_TEXT { + sections = append(sections, section) + section = Section{ + Type: REFLOW_TEXT + } + } + } else { + ignoreSection = false + section = Section{ + Type: REFLOW_TEXT + } + } + + section.Text = section.Text + "\n" + line + + continue + } + + linkMatch := LinkPattern.FindStringSubmatch(line) + if len(linkMatch) != 0 && linkMatch[0] != "" { + if !ignoreSection { + sections = append(sections, section) + } + + label := linkMatch[2] + if label == "" { + label = linkMatch[1] + } + + ignoreSection = false + section = Section{ + Type: LINK, + Text: label, + URL: template.URL(resolveURI(linkMatch[1], baseURL)), + } + + continue + } + + heading3Match := Heading3Pattern.FindStringSubmatch(line) + if len(heading3Match) != 0 { + if !ignoreSection { + sections = append(sections, section) + } + + ignoreSection = false + section = Section{ + Type: HEADING_3, + Text: heading3Match[1] + } + + continue + } + + heading2Match := Heading2Pattern.FindStringSubmatch(line) + if len(heading2Match) != 0 { + if !ignoreSection { + sections = append(sections, section) + } + + ignoreSection = false + section = Section{ + Type: HEADING_2, + Text: heading2Match[1] + } + + continue + } + + heading1Match := Heading1Pattern.FindStringSubmatch(line) + if len(heading1Match) != 0 { + if !ignoreSection { + sections = append(sections, section) + } + + ignoreSection = false + section = Section{ + Type: HEADING_1, + Text: heading1Match[1] + } + + continue + } + + listItemMatch := ListItemPattern.FindStringSubmatch(line) + if len(listItemMatch) != 0 { + if !ignoreSection { + if section.Type != LIST { + sections = append(sections, section) + section = Section{ + Type: LIST + } + } + } else { + ignoreSection = false + section = Section{ + Type: LIST, + } + } + + section.Items = append(section.Items, listItemMatch[1]) + + continue + } + } + + if !ignoreSection { + sections = append(sections, section) + } + + return +} -- cgit v1.2.3-70-g09d2