package libgemini import ( "bufio" "bytes" "crypto/tls" "errors" "fmt" "io" "mime" "net" "net/url" "regexp" "strconv" "strings" ) const ( CRLF = "\r\n" ) const ( STATUS_INPUT = 10 STATUS_SUCCESS = 20 STATUS_SUCCESS_CERT = 21 STATUS_REDIRECT_TEMP = 30 STATUS_REDIRECT_PERM = 31 STATUS_TEMP_FAILURE = 40 STATUS_SERVER_UNAVAILABLE = 41 STATUS_CGI_ERROR = 42 STATUS_PROXY_ERROR = 43 STATUS_SLOW_DOWN = 44 STATUS_PERM_FAILURE = 50 STATUS_NOT_FOUND = 51 STATUS_GONE = 52 STATUS_PROXY_REFUSED = 53 STATUS_BAD_REQUEST = 59 STATUS_CLIENT_CERT_EXPIRED = 60 STATUS_TRANSIENT_CERT_REQUEST = 61 STATUS_AUTH_CERT_REQUIRED = 62 STATUS_CERT_REJECTED = 63 STATUS_FUTURE_CERT_REJECTED = 64 STATUS_EXPIRED_CERT_REJECTED = 65 ) const ( MIME_GEMINI = "text/gemini" DEFAULT_MIME = MIME_GEMINI DEFAULT_CHARSET = "utf-8" ) var ( HeaderPattern = regexp.MustCompile("^(\\d\\d)[ \\t]+(.*)$") LinkPattern = regexp.MustCompile("^=>[ \\t]*([^ \\t]+)(?:[ \\t]+(.*))?$") ReflowModePattern = regexp.MustCompile("^```(.*)$") Heading1Pattern = regexp.MustCompile("^#(.*)$") Heading2Pattern = regexp.MustCompile("^##(.*)$") Heading3Pattern = regexp.MustCompile("^###(.*)$") ListItemPattern = regexp.MustCompile("^\\*(.*)$") TermEscapeSGRPattern = regexp.MustCompile("\\[\\d+(;\\d+)*m") ) type Header struct { Status int Meta string } type Response struct { Header *Header Body io.Reader } type GeminiDocSectionType byte const ( RAW_TEXT = GeminiDocSectionType(0) REFLOW_TEXT = GeminiDocSectionType(1) LINK = GeminiDocSectionType(2) HEADING_1 = GeminiDocSectionType(3) HEADING_2 = GeminiDocSectionType(4) HEADING_3 = GeminiDocSectionType(5) LIST = GeminiDocSectionType(6) ) type GeminiDocSection struct { Type GeminiDocSectionType Text string URL string Items []string } func Get(uri string) (*Response, error) { u, err := url.Parse(uri) if err != nil { return nil, err } if u.Scheme != "gemini" { return nil, errors.New("invalid scheme for uri") } host := u.Hostname() port := u.Port() if port == "" { port = "1965" } conn, err := tls.Dial("tcp", net.JoinHostPort(host, port), &tls.Config{ MinVersion: tls.VersionTLS12, InsecureSkipVerify: true, }) if err != nil { return nil, err } _, err = conn.Write([]byte(u.String() + CRLF)) if err != nil { conn.Close() return nil, err } reader := bufio.NewReader(conn) line, _, err := reader.ReadLine() if err != nil { conn.Close() return nil, err } header, err := ParseHeader(string(line)) if err != nil { conn.Close() return nil, err } return &Response{ Header: header, Body: reader, }, nil } func ParseHeader(line string) (header *Header, err error) { matches := HeaderPattern.FindStringSubmatch(line) status, err := strconv.Atoi(matches[1]) if err != nil { return nil, err } meta := matches[2] if int(status/10) == 2 { mediaType, params, err := mime.ParseMediaType(meta) if err != nil { meta = fmt.Sprintf("%s;charset=%s", DEFAULT_MIME, DEFAULT_CHARSET) } else if strings.HasPrefix(mediaType, "text/") { if _, ok := params["charset"]; !ok { meta += ";charset=" + DEFAULT_CHARSET } } } header = &Header{ Status: status, Meta: meta, } return } func ParseGeminiDocument(body *bytes.Buffer) (sections []GeminiDocSection) { scanner := bufio.NewScanner(body) reflow := true ignoreSection := true section := GeminiDocSection{ Type: REFLOW_TEXT, } for scanner.Scan() { line := strings.Trim(scanner.Text(), CRLF) line = TermEscapeSGRPattern.ReplaceAllString(line, "") reflowMatch := ReflowModePattern.FindStringSubmatch(line) if len(reflowMatch) != 0 && reflowMatch[0] != "" { reflow = !reflow continue } if !reflow { if !ignoreSection { if section.Type != RAW_TEXT { sections = append(sections, section) section = GeminiDocSection{ Type: RAW_TEXT, } } } else { ignoreSection = false section = GeminiDocSection{ Type: RAW_TEXT, } } section.Text = section.Text + "\n" + line continue } linkMatch := LinkPattern.FindStringSubmatch(line) if len(linkMatch) != 0 && linkMatch[0] != "" { if !ignoreSection { sections = append(sections, section) } label := linkMatch[2] if label == "" { label = linkMatch[1] } ignoreSection = false section = GeminiDocSection{ Type: LINK, Text: label, URL: linkMatch[1], } continue } heading3Match := Heading3Pattern.FindStringSubmatch(line) if len(heading3Match) != 0 && heading3Match[0] != "" { if !ignoreSection { sections = append(sections, section) } ignoreSection = false section = GeminiDocSection{ Type: HEADING_3, Text: heading3Match[1], } continue } heading2Match := Heading2Pattern.FindStringSubmatch(line) if len(heading2Match) != 0 && heading2Match[0] != "" { if !ignoreSection { sections = append(sections, section) } ignoreSection = false section = GeminiDocSection{ Type: HEADING_2, Text: heading2Match[1], } continue } heading1Match := Heading1Pattern.FindStringSubmatch(line) if len(heading1Match) != 0 && heading1Match[0] != "" { if !ignoreSection { sections = append(sections, section) } ignoreSection = false section = GeminiDocSection{ Type: HEADING_1, Text: heading1Match[1], } continue } listItemMatch := ListItemPattern.FindStringSubmatch(line) if len(listItemMatch) != 0 && listItemMatch[0] != "" { if !ignoreSection { if section.Type != LIST { sections = append(sections, section) section = GeminiDocSection{ Type: LIST, } } } else { ignoreSection = false section = GeminiDocSection{ Type: LIST, } } section.Items = append(section.Items, listItemMatch[1]) continue } if !ignoreSection { if section.Type != REFLOW_TEXT { sections = append(sections, section) section = GeminiDocSection{ Type: REFLOW_TEXT, } } } else { ignoreSection = false section = GeminiDocSection{ Type: REFLOW_TEXT, } } section.Text = section.Text + "\n" + line } if !ignoreSection { sections = append(sections, section) } return }