diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/libgemini/libgemini.go | 165 |
1 files changed, 163 insertions, 2 deletions
diff --git a/pkg/libgemini/libgemini.go b/pkg/libgemini/libgemini.go index 303490c..71012ef 100644 --- a/pkg/libgemini/libgemini.go +++ b/pkg/libgemini/libgemini.go | |||
@@ -2,9 +2,11 @@ package libgemini | |||
2 | 2 | ||
3 | import ( | 3 | import ( |
4 | "bufio" | 4 | "bufio" |
5 | "bytes" | ||
5 | "crypto/tls" | 6 | "crypto/tls" |
6 | "errors" | 7 | "errors" |
7 | "fmt" | 8 | "fmt" |
9 | "html/template" | ||
8 | "io" | 10 | "io" |
9 | "mime" | 11 | "mime" |
10 | "net" | 12 | "net" |
@@ -49,8 +51,14 @@ const ( | |||
49 | ) | 51 | ) |
50 | 52 | ||
51 | var ( | 53 | var ( |
52 | HeaderPattern = regexp.MustCompile("^(\\d\\d)[ \\t]+(.*)$") | 54 | HeaderPattern = regexp.MustCompile("^(\\d\\d)[ \\t]+(.*)$") |
53 | LinkPattern = regexp.MustCompile("^=>[ \\t]*([^ \\t]+)(?:[ \\t]+(.*))?$") | 55 | LinkPattern = regexp.MustCompile("^=>[ \\t]*([^ \\t]+)(?:[ \\t]+(.*))?$") |
56 | ReflowModePattern = regexp.MustCompile("^```(.*)$") | ||
57 | Heading1Pattern = regexp.MustCompile("^#(.*)$") | ||
58 | Heading2Pattern = regexp.MustCompile("^##(.*)$") | ||
59 | Heading3Pattern = regexp.MustCompile("^###(.*)$") | ||
60 | ListItemPattern = regexp.MustCompile("^\\*(.*)$") | ||
61 | TermEscapeSGRPattern = regexp.MustCompile("\\[\\d+(;\\d+)*m") | ||
54 | ) | 62 | ) |
55 | 63 | ||
56 | type Header struct { | 64 | type Header struct { |
@@ -63,6 +71,25 @@ type Response struct { | |||
63 | Body io.Reader | 71 | Body io.Reader |
64 | } | 72 | } |
65 | 73 | ||
74 | type GeminiDocSectionType byte | ||
75 | |||
76 | const ( | ||
77 | RAW_TEXT = SectionType(0) | ||
78 | REFLOW_TEXT = SectionType(1) | ||
79 | LINK = SectionType(2) | ||
80 | HEADING_1 = SectionType(3) | ||
81 | HEADING_2 = SectionType(4) | ||
82 | HEADING_3 = SectionType(5) | ||
83 | LIST = SectionType(6) | ||
84 | ) | ||
85 | |||
86 | type GeminiDocSection struct { | ||
87 | Type SectionType | ||
88 | Text string | ||
89 | URL template.URL | ||
90 | Items []string | ||
91 | } | ||
92 | |||
66 | func Get(uri string) (*Response, error) { | 93 | func Get(uri string) (*Response, error) { |
67 | u, err := url.Parse(uri) | 94 | u, err := url.Parse(uri) |
68 | if err != nil { | 95 | if err != nil { |
@@ -143,3 +170,137 @@ func ParseHeader(line string) (header *Header, err error) { | |||
143 | 170 | ||
144 | return | 171 | return |
145 | } | 172 | } |
173 | |||
174 | func ParseGeminiDocument(body *bytes.Buffer) (sections []Section) { | ||
175 | scanner := bufio.NewScanner(body) | ||
176 | |||
177 | reflow := true | ||
178 | ignoreSection := true | ||
179 | section := Section{ | ||
180 | Type: REFLOW_TEXT | ||
181 | } | ||
182 | |||
183 | for scanner.Scan() { | ||
184 | line := strings.Trim(scanner.Text(), CRLF) | ||
185 | line = TermEscapeSGRPattern.ReplaceAllString(line, "") | ||
186 | |||
187 | reflowMatch := ReflowModePattern.FindStringSubmatch(line) | ||
188 | if len(heading3Match) != 0 { | ||
189 | reflow = !reflow | ||
190 | continue | ||
191 | } | ||
192 | |||
193 | if !reflow { | ||
194 | if !ignoreSection { | ||
195 | if section.Type != REFLOW_TEXT { | ||
196 | sections = append(sections, section) | ||
197 | section = Section{ | ||
198 | Type: REFLOW_TEXT | ||
199 | } | ||
200 | } | ||
201 | } else { | ||
202 | ignoreSection = false | ||
203 | section = Section{ | ||
204 | Type: REFLOW_TEXT | ||
205 | } | ||
206 | } | ||
207 | |||
208 | section.Text = section.Text + "\n" + line | ||
209 | |||
210 | continue | ||
211 | } | ||
212 | |||
213 | linkMatch := LinkPattern.FindStringSubmatch(line) | ||
214 | if len(linkMatch) != 0 && linkMatch[0] != "" { | ||
215 | if !ignoreSection { | ||
216 | sections = append(sections, section) | ||
217 | } | ||
218 | |||
219 | label := linkMatch[2] | ||
220 | if label == "" { | ||
221 | label = linkMatch[1] | ||
222 | } | ||
223 | |||
224 | ignoreSection = false | ||
225 | section = Section{ | ||
226 | Type: LINK, | ||
227 | Text: label, | ||
228 | URL: template.URL(resolveURI(linkMatch[1], baseURL)), | ||
229 | } | ||
230 | |||
231 | continue | ||
232 | } | ||
233 | |||
234 | heading3Match := Heading3Pattern.FindStringSubmatch(line) | ||
235 | if len(heading3Match) != 0 { | ||
236 | if !ignoreSection { | ||
237 | sections = append(sections, section) | ||
238 | } | ||
239 | |||
240 | ignoreSection = false | ||
241 | section = Section{ | ||
242 | Type: HEADING_3, | ||
243 | Text: heading3Match[1] | ||
244 | } | ||
245 | |||
246 | continue | ||
247 | } | ||
248 | |||
249 | heading2Match := Heading2Pattern.FindStringSubmatch(line) | ||
250 | if len(heading2Match) != 0 { | ||
251 | if !ignoreSection { | ||
252 | sections = append(sections, section) | ||
253 | } | ||
254 | |||
255 | ignoreSection = false | ||
256 | section = Section{ | ||
257 | Type: HEADING_2, | ||
258 | Text: heading2Match[1] | ||
259 | } | ||
260 | |||
261 | continue | ||
262 | } | ||
263 | |||
264 | heading1Match := Heading1Pattern.FindStringSubmatch(line) | ||
265 | if len(heading1Match) != 0 { | ||
266 | if !ignoreSection { | ||
267 | sections = append(sections, section) | ||
268 | } | ||
269 | |||
270 | ignoreSection = false | ||
271 | section = Section{ | ||
272 | Type: HEADING_1, | ||
273 | Text: heading1Match[1] | ||
274 | } | ||
275 | |||
276 | continue | ||
277 | } | ||
278 | |||
279 | listItemMatch := ListItemPattern.FindStringSubmatch(line) | ||
280 | if len(listItemMatch) != 0 { | ||
281 | if !ignoreSection { | ||
282 | if section.Type != LIST { | ||
283 | sections = append(sections, section) | ||
284 | section = Section{ | ||
285 | Type: LIST | ||
286 | } | ||
287 | } | ||
288 | } else { | ||
289 | ignoreSection = false | ||
290 | section = Section{ | ||
291 | Type: LIST, | ||
292 | } | ||
293 | } | ||
294 | |||
295 | section.Items = append(section.Items, listItemMatch[1]) | ||
296 | |||
297 | continue | ||
298 | } | ||
299 | } | ||
300 | |||
301 | if !ignoreSection { | ||
302 | sections = append(sections, section) | ||
303 | } | ||
304 | |||
305 | return | ||
306 | } | ||