home

My NixOS systems configurations.
Log | Files | Refs | LICENSE

commit 9eb67e685b5d04401b6dbbcf9b273ab298fd7cdd
parent 9064c333249a0beea0b8a3d5c15a518927b36d85
Author: Vincent Demeester <vincent@sbr.pm>
Date:   Mon, 17 Jun 2024 16:31:33 +0200

go-org-readwise: almost "full" implementation.

Signed-off-by: Vincent Demeester <vincent@sbr.pm>

Diffstat:
Mtools/go-org-readwise/internal/org/org.go | 143+++++++++++++++++--------------------------------------------------------------
Atools/go-org-readwise/internal/org/sync.go | 176+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rtools/go-org-readwise/internal/org/org_test.go -> tools/go-org-readwise/internal/org/sync_test.go | 0
Atools/go-org-readwise/internal/org/write.go | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtools/go-org-readwise/internal/readwise/types.go | 3+--
Mtools/go-org-readwise/main.go | 5-----
6 files changed, 268 insertions(+), 119 deletions(-)

diff --git a/tools/go-org-readwise/internal/org/org.go b/tools/go-org-readwise/internal/org/org.go @@ -1,119 +1,38 @@ package org -import ( - "context" - "errors" - "fmt" - "os" - "regexp" - "strings" - - "github.com/vdemeester/home/tools/go-org-readwise/internal/readwise" -) - -const ( - // denote-id-format "%Y%m%dT%H%M%S" - denoteDateFormat = "20060102T150405" - // punctionation that is removed from file names. - denoteExcludedPunctuationRegexpStr = "[][{}!@#$%^&*()=+'\"?,.|;:~`‘’“”/]*" -) - -var ( - denoteExcludedPunctuationRegexp = regexp.MustCompile(denoteExcludedPunctuationRegexpStr) - replaceHypensRegexp = regexp.MustCompile("[-]+") -) - -/* -For each results: -- Define a filename (denote naming — gonna be weird but meh) — from title + first highlight date -- Detect if the file exists -- If the file doesn't exist, create the file -- If the file exist, append - -For the file format: org file with denote naming -And use the update date to add new highlights -*/ - -func Sync(ctx context.Context, target string, results []readwise.Result) error { - for _, result := range results { - // FIXME: handle the case where tags where added after - // a sync. In that case, we want to try different - // titles (without tags, …) ; most likely we want to - // use a regexp to "detect" part of the thing. - filename := denoteFilename(result) - fmt.Println("file", filename) - if _, err := os.Stat(filename); err == nil { - // Append to the file - return errors.New("Not implemented") - } else if errors.Is(err, os.ErrNotExist) { - // Create the file - } else { - // Schrodinger: file may or may not exist. See err for details. - // Therefore, do *NOT* use !os.IsNotExist(err) to test for file existence - return err - } - } - return nil +// Document is a "full" org-mode document, used for a new "readwise +// document" containing highlights. The "full" notion here being, what +// I need to sync from readwise to org, not the full representation of +// a org file. +type Document struct { + Title string + Author string + Email string + Date string + FileTags []string + Identifier string + Category string + URL string + ReadwiseURL string + Summary string + Highlights []Highlight } -// See https://protesilaos.com/emacs/denote#h:4e9c7512-84dc-4dfb-9fa9-e15d51178e5d -// DATE==SIGNATURE--TITLE__KEYWORDS.EXTENSION -// Examples: -// - 20240611T100401--tuesday-11-june-2024__journal.org -// - 20240511T100401==readwise--foo__bar_baz.org -func denoteFilename(result readwise.Result) string { - var date, signature, title, keywords string - // The DATE field represents the date in year-month-day format - // followed by the capital letter T (for “time”) and the - // current time in hour-minute-second notation. The - // presentation is compact: 20220531T091625. The DATE serves - // as the unique identifier of each note and, as such, is also - // known as the file’s ID or identifier. - date = result.FirstHighlightDate().Format(denoteDateFormat) - - // File names can include a string of alphanumeric characters - // in the SIGNATURE field. Signatures have no clearly defined - // purpose and are up to the user to define. One use-case is - // to use them to establish sequential relations between files - // (e.g. 1, 1a, 1b, 1b1, 1b2, …). - // We use signature to mark files synced from readwise. - signature = "==readwise" - - // The TITLE field is the title of the note, as provided by - // the user. It automatically gets downcased by default and is - // also hyphenated (Sluggification of file name - // components). An entry about “Economics in the Euro Area” - // produces an economics-in-the-euro-area string for the TITLE - // of the file name. - title = sluggify(result.Title) - - // The KEYWORDS field consists of one or more entries - // demarcated by an underscore (the separator is inserted - // automatically). Each keyword is a string provided by the - // user at the relevant prompt which broadly describes the - // contents of the entry. - if len(result.BookTags) > 0 { - tags := make([]string, len(result.BookTags)) - for i, t := range result.BookTags { - tags[i] = sluggify(t.Name) - } - keywords = "__" + strings.Join(tags, "_") - } - - return fmt.Sprintf("%s%s--%s%s.org", date, signature, title, keywords) +// PartialDocument is a subset of org-mode used for an update of a +// "readwise document", thus containing new highlights. +type PartialDocument struct { + Date string + Highlights []Highlight } -func sluggify(s string) string { - // Remove punctuation - s = denoteExcludedPunctuationRegexp.ReplaceAllString(s, "") - // Replace spaces with hypens - s = strings.ReplaceAll(s, " ", "-") - // Replace underscore with hypens - s = strings.ReplaceAll(s, "_", "-") - // Replace multiple hypens with a single one - s = replaceHypensRegexp.ReplaceAllString(s, "-") - // Remove any leading and trailing hypen - s = strings.TrimPrefix(s, "-") - s = strings.TrimSuffix(s, "-") - return s +// Highlight represent a readwise highlight in org-mode. +type Highlight struct { + ID string + URL string + Location string + LocationType string + Date string + Note string + Text string + Tags []string } diff --git a/tools/go-org-readwise/internal/org/sync.go b/tools/go-org-readwise/internal/org/sync.go @@ -0,0 +1,176 @@ +package org + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/vdemeester/home/tools/go-org-readwise/internal/readwise" +) + +const ( + // denote-id-format "%Y%m%dT%H%M%S" + denoteDateFormat = "20060102T150405" + // org-date-format 2024-06-17 Mon 12:05 + orgDateFormat = "2006-01-02 Mon 15:04" + // punctionation that is removed from file names. + denoteExcludedPunctuationRegexpStr = "[][{}!@#$%^&*()=+'\"?,.|;:~`‘’“”/]*" +) + +var ( + denoteExcludedPunctuationRegexp = regexp.MustCompile(denoteExcludedPunctuationRegexpStr) + replaceHypensRegexp = regexp.MustCompile("[-]+") +) + +/* +For each results: +- Define a filename (denote naming — gonna be weird but meh) — from title + first highlight date +- Detect if the file exists +- If the file doesn't exist, create the file +- If the file exist, append + +For the file format: org file with denote naming +And use the update date to add new highlights +*/ + +func Sync(ctx context.Context, target string, results []readwise.Result) error { + for _, result := range results { + // FIXME: handle the case where tags where added after + // a sync. In that case, we want to try different + // titles (without tags, …) ; most likely we want to + // use a regexp to "detect" part of the thing. + denotefilename := denoteFilename(result) + filename := filepath.Join(target, denotefilename) + fmt.Println("file", filename) + if _, err := os.Stat(filename); err == nil { + // Append to the file + return errors.New("Not implemented") + } else if errors.Is(err, os.ErrNotExist) { + // Create the file + d := createNewOrgDocument(result) + content, err := convertDocument(d) + if err != nil { + return err + } + if err := os.WriteFile(filename, content, 0o644); err != nil { + return err + } + } else { + // Schrodinger: file may or may not exist. See err for details. + // Therefore, do *NOT* use !os.IsNotExist(err) to test for file existence + return err + } + } + return nil +} + +func createNewOrgDocument(r readwise.Result) Document { + var filetags []string + if len(r.BookTags) > 0 { + filetags = make([]string, len(r.BookTags)+1) + for i, t := range r.BookTags { + filetags[i] = sluggify(t.Name) + } + } + d := Document{ + Title: r.Title, + Author: r.Author, + ReadwiseURL: r.ReadwiseURL, + URL: r.SourceURL, + Email: "", // Figure out how to get the email + Date: r.FirstHighlightDate().Format(orgDateFormat), + Identifier: r.FirstHighlightDate().Format(denoteDateFormat), + FileTags: filetags, + Category: r.Category, + Summary: r.Summary, + Highlights: transformHighlights(r.Highlights), + } + return d +} + +func transformHighlights(highlights []readwise.Highlight) []Highlight { + orgHighlights := make([]Highlight, len(highlights)) + for i, h := range highlights { + var tags []string + if len(h.Tags) > 0 { + tags = make([]string, len(h.Tags)) + for i, t := range h.Tags { + tags[i] = sluggify(t.Name) + } + } + orgHighlights[i] = Highlight{ + ID: fmt.Sprintf("%d", h.ID), + URL: h.ReadwiseURL, + Date: h.HighlightedAt.Format(orgDateFormat), + Note: h.Note, + Text: h.Text, + } + } + return orgHighlights +} + +// See https://protesilaos.com/emacs/denote#h:4e9c7512-84dc-4dfb-9fa9-e15d51178e5d +// DATE==SIGNATURE--TITLE__KEYWORDS.EXTENSION +// Examples: +// - 20240611T100401--tuesday-11-june-2024__journal.org +// - 20240511T100401==readwise--foo__bar_baz.org +func denoteFilename(result readwise.Result) string { + var date, signature, title, keywords string + // The DATE field represents the date in year-month-day format + // followed by the capital letter T (for “time”) and the + // current time in hour-minute-second notation. The + // presentation is compact: 20220531T091625. The DATE serves + // as the unique identifier of each note and, as such, is also + // known as the file’s ID or identifier. + date = result.FirstHighlightDate().Format(denoteDateFormat) + + // File names can include a string of alphanumeric characters + // in the SIGNATURE field. Signatures have no clearly defined + // purpose and are up to the user to define. One use-case is + // to use them to establish sequential relations between files + // (e.g. 1, 1a, 1b, 1b1, 1b2, …). + // We use signature to mark files synced from readwise. + signature = "==" + result.Category + + // The TITLE field is the title of the note, as provided by + // the user. It automatically gets downcased by default and is + // also hyphenated (Sluggification of file name + // components). An entry about “Economics in the Euro Area” + // produces an economics-in-the-euro-area string for the TITLE + // of the file name. + title = sluggify(result.Title) + + // The KEYWORDS field consists of one or more entries + // demarcated by an underscore (the separator is inserted + // automatically). Each keyword is a string provided by the + // user at the relevant prompt which broadly describes the + // contents of the entry. + if len(result.BookTags) > 0 { + tags := make([]string, len(result.BookTags)) + for i, t := range result.BookTags { + tags[i] = sluggify(t.Name) + } + keywords = "__" + strings.Join(tags, "_") + } + + return strings.ToLower(fmt.Sprintf("%s%s--%s%s.org", date, signature, title, keywords)) +} + +func sluggify(s string) string { + // Remove punctuation + s = denoteExcludedPunctuationRegexp.ReplaceAllString(s, "") + // Replace spaces with hypens + s = strings.ReplaceAll(s, " ", "-") + // Replace underscore with hypens + s = strings.ReplaceAll(s, "_", "-") + // Replace multiple hypens with a single one + s = replaceHypensRegexp.ReplaceAllString(s, "-") + // Remove any leading and trailing hypen + s = strings.TrimPrefix(s, "-") + s = strings.TrimSuffix(s, "-") + return s +} diff --git a/tools/go-org-readwise/internal/org/org_test.go b/tools/go-org-readwise/internal/org/sync_test.go diff --git a/tools/go-org-readwise/internal/org/write.go b/tools/go-org-readwise/internal/org/write.go @@ -0,0 +1,60 @@ +package org + +import ( + "bytes" + "fmt" + "strings" + "text/template" +) + +var ( + additionnalTemplate = `* New Highlights on {{ .Date }} +{{ range .Highlights }} +{{ end }}` + + mainTemplate = `#+title: {{ .Title }} +#+author: {{ .Author }} +#+date: {{ .Date }} +#+identifier: {{ .Identifier }} +#+category: {{ .Category }} +{{ if .FileTags }}#+filetags: {{ orgtags .FileTags }}{{ end }} +#+property: READWISE_URL: {{ .ReadwiseURL }} +{{ if .URL }}#+property: URL: {{ .URL }}{{ end }} + +{{ .Summary }} + +* Highlights +{{ range $h := .Highlights -}} +** [{{ $h.Date }}] Highlight [[{{ $h.URL }}][{{ $h.ID }}]]{{ if $h.Tags }} {{ orgtags $h.Tags }}{{ end }} +{{ $h.Text }} +{{ if $h.Note }}*** Note +{{ $h.Note }} +{{ end -}} +{{ end -}} +` +) + +func orgtags(tags []string) string { + if len(tags) == 0 { + return "" + } + return fmt.Sprintf(":%s:", strings.Join(tags, ":")) +} + +func convertDocument(d Document) ([]byte, error) { + var err error + + funcMap := template.FuncMap{ + "orgtags": orgtags, + } + + tmpl, err := template.New("org").Funcs(funcMap).Parse(mainTemplate) + if err != nil { + return []byte{}, err + } + var buff bytes.Buffer + if err := tmpl.Execute(&buff, d); err != nil { + return []byte{}, err + } + return buff.Bytes(), nil +} diff --git a/tools/go-org-readwise/internal/readwise/types.go b/tools/go-org-readwise/internal/readwise/types.go @@ -42,8 +42,7 @@ type Highlight struct { Text string `json:"text"` ID int `json:"id"` Note string `json:"note"` - Location int `json:"location"` - LocationType string `json:"location_type"` + ReadwiseURL string `json:"readwise_url"` HighlightedAt time.Time `json:"highlighted_at"` BookID int `json:"book_id"` URL string `json:"url"` diff --git a/tools/go-org-readwise/main.go b/tools/go-org-readwise/main.go @@ -14,11 +14,6 @@ import ( ) func main() { - // for _, n := range d.Nodes { - // fmt.Printf("%+v\n", n) - // } - - os.Exit(1) apiKeyFile := flag.String("apiKeyFile", "", "File to load the apiKey from. If empty, it will defer to the READWISE_KEY environment variable") targetFolder := flag.String("targetFolder", "", "Folder to write highlights (in org file) into") flag.Parse()