commit 9eb67e685b5d04401b6dbbcf9b273ab298fd7cdd
parent 9064c333249a0beea0b8a3d5c15a518927b36d85
Author: Vincent Demeester <vincent@sbr.pm>
Date: Mon, 17 Jun 2024 16:31:33 +0200
go-org-readwise: almost "full" implementation.
Signed-off-by: Vincent Demeester <vincent@sbr.pm>
Diffstat:
6 files changed, 268 insertions(+), 119 deletions(-)
diff --git a/tools/go-org-readwise/internal/org/org.go b/tools/go-org-readwise/internal/org/org.go
@@ -1,119 +1,38 @@
package org
-import (
- "context"
- "errors"
- "fmt"
- "os"
- "regexp"
- "strings"
-
- "github.com/vdemeester/home/tools/go-org-readwise/internal/readwise"
-)
-
-const (
- // denote-id-format "%Y%m%dT%H%M%S"
- denoteDateFormat = "20060102T150405"
- // punctionation that is removed from file names.
- denoteExcludedPunctuationRegexpStr = "[][{}!@#$%^&*()=+'\"?,.|;:~`‘’“”/]*"
-)
-
-var (
- denoteExcludedPunctuationRegexp = regexp.MustCompile(denoteExcludedPunctuationRegexpStr)
- replaceHypensRegexp = regexp.MustCompile("[-]+")
-)
-
-/*
-For each results:
-- Define a filename (denote naming — gonna be weird but meh) — from title + first highlight date
-- Detect if the file exists
-- If the file doesn't exist, create the file
-- If the file exist, append
-
-For the file format: org file with denote naming
-And use the update date to add new highlights
-*/
-
-func Sync(ctx context.Context, target string, results []readwise.Result) error {
- for _, result := range results {
- // FIXME: handle the case where tags where added after
- // a sync. In that case, we want to try different
- // titles (without tags, …) ; most likely we want to
- // use a regexp to "detect" part of the thing.
- filename := denoteFilename(result)
- fmt.Println("file", filename)
- if _, err := os.Stat(filename); err == nil {
- // Append to the file
- return errors.New("Not implemented")
- } else if errors.Is(err, os.ErrNotExist) {
- // Create the file
- } else {
- // Schrodinger: file may or may not exist. See err for details.
- // Therefore, do *NOT* use !os.IsNotExist(err) to test for file existence
- return err
- }
- }
- return nil
+// Document is a "full" org-mode document, used for a new "readwise
+// document" containing highlights. The "full" notion here being, what
+// I need to sync from readwise to org, not the full representation of
+// a org file.
+type Document struct {
+ Title string
+ Author string
+ Email string
+ Date string
+ FileTags []string
+ Identifier string
+ Category string
+ URL string
+ ReadwiseURL string
+ Summary string
+ Highlights []Highlight
}
-// See https://protesilaos.com/emacs/denote#h:4e9c7512-84dc-4dfb-9fa9-e15d51178e5d
-// DATE==SIGNATURE--TITLE__KEYWORDS.EXTENSION
-// Examples:
-// - 20240611T100401--tuesday-11-june-2024__journal.org
-// - 20240511T100401==readwise--foo__bar_baz.org
-func denoteFilename(result readwise.Result) string {
- var date, signature, title, keywords string
- // The DATE field represents the date in year-month-day format
- // followed by the capital letter T (for “time”) and the
- // current time in hour-minute-second notation. The
- // presentation is compact: 20220531T091625. The DATE serves
- // as the unique identifier of each note and, as such, is also
- // known as the file’s ID or identifier.
- date = result.FirstHighlightDate().Format(denoteDateFormat)
-
- // File names can include a string of alphanumeric characters
- // in the SIGNATURE field. Signatures have no clearly defined
- // purpose and are up to the user to define. One use-case is
- // to use them to establish sequential relations between files
- // (e.g. 1, 1a, 1b, 1b1, 1b2, …).
- // We use signature to mark files synced from readwise.
- signature = "==readwise"
-
- // The TITLE field is the title of the note, as provided by
- // the user. It automatically gets downcased by default and is
- // also hyphenated (Sluggification of file name
- // components). An entry about “Economics in the Euro Area”
- // produces an economics-in-the-euro-area string for the TITLE
- // of the file name.
- title = sluggify(result.Title)
-
- // The KEYWORDS field consists of one or more entries
- // demarcated by an underscore (the separator is inserted
- // automatically). Each keyword is a string provided by the
- // user at the relevant prompt which broadly describes the
- // contents of the entry.
- if len(result.BookTags) > 0 {
- tags := make([]string, len(result.BookTags))
- for i, t := range result.BookTags {
- tags[i] = sluggify(t.Name)
- }
- keywords = "__" + strings.Join(tags, "_")
- }
-
- return fmt.Sprintf("%s%s--%s%s.org", date, signature, title, keywords)
+// PartialDocument is a subset of org-mode used for an update of a
+// "readwise document", thus containing new highlights.
+type PartialDocument struct {
+ Date string
+ Highlights []Highlight
}
-func sluggify(s string) string {
- // Remove punctuation
- s = denoteExcludedPunctuationRegexp.ReplaceAllString(s, "")
- // Replace spaces with hypens
- s = strings.ReplaceAll(s, " ", "-")
- // Replace underscore with hypens
- s = strings.ReplaceAll(s, "_", "-")
- // Replace multiple hypens with a single one
- s = replaceHypensRegexp.ReplaceAllString(s, "-")
- // Remove any leading and trailing hypen
- s = strings.TrimPrefix(s, "-")
- s = strings.TrimSuffix(s, "-")
- return s
+// Highlight represent a readwise highlight in org-mode.
+type Highlight struct {
+ ID string
+ URL string
+ Location string
+ LocationType string
+ Date string
+ Note string
+ Text string
+ Tags []string
}
diff --git a/tools/go-org-readwise/internal/org/sync.go b/tools/go-org-readwise/internal/org/sync.go
@@ -0,0 +1,176 @@
+package org
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "os"
+ "path/filepath"
+ "regexp"
+ "strings"
+
+ "github.com/vdemeester/home/tools/go-org-readwise/internal/readwise"
+)
+
+const (
+ // denote-id-format "%Y%m%dT%H%M%S"
+ denoteDateFormat = "20060102T150405"
+ // org-date-format 2024-06-17 Mon 12:05
+ orgDateFormat = "2006-01-02 Mon 15:04"
+ // punctionation that is removed from file names.
+ denoteExcludedPunctuationRegexpStr = "[][{}!@#$%^&*()=+'\"?,.|;:~`‘’“”/]*"
+)
+
+var (
+ denoteExcludedPunctuationRegexp = regexp.MustCompile(denoteExcludedPunctuationRegexpStr)
+ replaceHypensRegexp = regexp.MustCompile("[-]+")
+)
+
+/*
+For each results:
+- Define a filename (denote naming — gonna be weird but meh) — from title + first highlight date
+- Detect if the file exists
+- If the file doesn't exist, create the file
+- If the file exist, append
+
+For the file format: org file with denote naming
+And use the update date to add new highlights
+*/
+
+func Sync(ctx context.Context, target string, results []readwise.Result) error {
+ for _, result := range results {
+ // FIXME: handle the case where tags where added after
+ // a sync. In that case, we want to try different
+ // titles (without tags, …) ; most likely we want to
+ // use a regexp to "detect" part of the thing.
+ denotefilename := denoteFilename(result)
+ filename := filepath.Join(target, denotefilename)
+ fmt.Println("file", filename)
+ if _, err := os.Stat(filename); err == nil {
+ // Append to the file
+ return errors.New("Not implemented")
+ } else if errors.Is(err, os.ErrNotExist) {
+ // Create the file
+ d := createNewOrgDocument(result)
+ content, err := convertDocument(d)
+ if err != nil {
+ return err
+ }
+ if err := os.WriteFile(filename, content, 0o644); err != nil {
+ return err
+ }
+ } else {
+ // Schrodinger: file may or may not exist. See err for details.
+ // Therefore, do *NOT* use !os.IsNotExist(err) to test for file existence
+ return err
+ }
+ }
+ return nil
+}
+
+func createNewOrgDocument(r readwise.Result) Document {
+ var filetags []string
+ if len(r.BookTags) > 0 {
+ filetags = make([]string, len(r.BookTags)+1)
+ for i, t := range r.BookTags {
+ filetags[i] = sluggify(t.Name)
+ }
+ }
+ d := Document{
+ Title: r.Title,
+ Author: r.Author,
+ ReadwiseURL: r.ReadwiseURL,
+ URL: r.SourceURL,
+ Email: "", // Figure out how to get the email
+ Date: r.FirstHighlightDate().Format(orgDateFormat),
+ Identifier: r.FirstHighlightDate().Format(denoteDateFormat),
+ FileTags: filetags,
+ Category: r.Category,
+ Summary: r.Summary,
+ Highlights: transformHighlights(r.Highlights),
+ }
+ return d
+}
+
+func transformHighlights(highlights []readwise.Highlight) []Highlight {
+ orgHighlights := make([]Highlight, len(highlights))
+ for i, h := range highlights {
+ var tags []string
+ if len(h.Tags) > 0 {
+ tags = make([]string, len(h.Tags))
+ for i, t := range h.Tags {
+ tags[i] = sluggify(t.Name)
+ }
+ }
+ orgHighlights[i] = Highlight{
+ ID: fmt.Sprintf("%d", h.ID),
+ URL: h.ReadwiseURL,
+ Date: h.HighlightedAt.Format(orgDateFormat),
+ Note: h.Note,
+ Text: h.Text,
+ }
+ }
+ return orgHighlights
+}
+
+// See https://protesilaos.com/emacs/denote#h:4e9c7512-84dc-4dfb-9fa9-e15d51178e5d
+// DATE==SIGNATURE--TITLE__KEYWORDS.EXTENSION
+// Examples:
+// - 20240611T100401--tuesday-11-june-2024__journal.org
+// - 20240511T100401==readwise--foo__bar_baz.org
+func denoteFilename(result readwise.Result) string {
+ var date, signature, title, keywords string
+ // The DATE field represents the date in year-month-day format
+ // followed by the capital letter T (for “time”) and the
+ // current time in hour-minute-second notation. The
+ // presentation is compact: 20220531T091625. The DATE serves
+ // as the unique identifier of each note and, as such, is also
+ // known as the file’s ID or identifier.
+ date = result.FirstHighlightDate().Format(denoteDateFormat)
+
+ // File names can include a string of alphanumeric characters
+ // in the SIGNATURE field. Signatures have no clearly defined
+ // purpose and are up to the user to define. One use-case is
+ // to use them to establish sequential relations between files
+ // (e.g. 1, 1a, 1b, 1b1, 1b2, …).
+ // We use signature to mark files synced from readwise.
+ signature = "==" + result.Category
+
+ // The TITLE field is the title of the note, as provided by
+ // the user. It automatically gets downcased by default and is
+ // also hyphenated (Sluggification of file name
+ // components). An entry about “Economics in the Euro Area”
+ // produces an economics-in-the-euro-area string for the TITLE
+ // of the file name.
+ title = sluggify(result.Title)
+
+ // The KEYWORDS field consists of one or more entries
+ // demarcated by an underscore (the separator is inserted
+ // automatically). Each keyword is a string provided by the
+ // user at the relevant prompt which broadly describes the
+ // contents of the entry.
+ if len(result.BookTags) > 0 {
+ tags := make([]string, len(result.BookTags))
+ for i, t := range result.BookTags {
+ tags[i] = sluggify(t.Name)
+ }
+ keywords = "__" + strings.Join(tags, "_")
+ }
+
+ return strings.ToLower(fmt.Sprintf("%s%s--%s%s.org", date, signature, title, keywords))
+}
+
+func sluggify(s string) string {
+ // Remove punctuation
+ s = denoteExcludedPunctuationRegexp.ReplaceAllString(s, "")
+ // Replace spaces with hypens
+ s = strings.ReplaceAll(s, " ", "-")
+ // Replace underscore with hypens
+ s = strings.ReplaceAll(s, "_", "-")
+ // Replace multiple hypens with a single one
+ s = replaceHypensRegexp.ReplaceAllString(s, "-")
+ // Remove any leading and trailing hypen
+ s = strings.TrimPrefix(s, "-")
+ s = strings.TrimSuffix(s, "-")
+ return s
+}
diff --git a/tools/go-org-readwise/internal/org/org_test.go b/tools/go-org-readwise/internal/org/sync_test.go
diff --git a/tools/go-org-readwise/internal/org/write.go b/tools/go-org-readwise/internal/org/write.go
@@ -0,0 +1,60 @@
+package org
+
+import (
+ "bytes"
+ "fmt"
+ "strings"
+ "text/template"
+)
+
+var (
+ additionnalTemplate = `* New Highlights on {{ .Date }}
+{{ range .Highlights }}
+{{ end }}`
+
+ mainTemplate = `#+title: {{ .Title }}
+#+author: {{ .Author }}
+#+date: {{ .Date }}
+#+identifier: {{ .Identifier }}
+#+category: {{ .Category }}
+{{ if .FileTags }}#+filetags: {{ orgtags .FileTags }}{{ end }}
+#+property: READWISE_URL: {{ .ReadwiseURL }}
+{{ if .URL }}#+property: URL: {{ .URL }}{{ end }}
+
+{{ .Summary }}
+
+* Highlights
+{{ range $h := .Highlights -}}
+** [{{ $h.Date }}] Highlight [[{{ $h.URL }}][{{ $h.ID }}]]{{ if $h.Tags }} {{ orgtags $h.Tags }}{{ end }}
+{{ $h.Text }}
+{{ if $h.Note }}*** Note
+{{ $h.Note }}
+{{ end -}}
+{{ end -}}
+`
+)
+
+func orgtags(tags []string) string {
+ if len(tags) == 0 {
+ return ""
+ }
+ return fmt.Sprintf(":%s:", strings.Join(tags, ":"))
+}
+
+func convertDocument(d Document) ([]byte, error) {
+ var err error
+
+ funcMap := template.FuncMap{
+ "orgtags": orgtags,
+ }
+
+ tmpl, err := template.New("org").Funcs(funcMap).Parse(mainTemplate)
+ if err != nil {
+ return []byte{}, err
+ }
+ var buff bytes.Buffer
+ if err := tmpl.Execute(&buff, d); err != nil {
+ return []byte{}, err
+ }
+ return buff.Bytes(), nil
+}
diff --git a/tools/go-org-readwise/internal/readwise/types.go b/tools/go-org-readwise/internal/readwise/types.go
@@ -42,8 +42,7 @@ type Highlight struct {
Text string `json:"text"`
ID int `json:"id"`
Note string `json:"note"`
- Location int `json:"location"`
- LocationType string `json:"location_type"`
+ ReadwiseURL string `json:"readwise_url"`
HighlightedAt time.Time `json:"highlighted_at"`
BookID int `json:"book_id"`
URL string `json:"url"`
diff --git a/tools/go-org-readwise/main.go b/tools/go-org-readwise/main.go
@@ -14,11 +14,6 @@ import (
)
func main() {
- // for _, n := range d.Nodes {
- // fmt.Printf("%+v\n", n)
- // }
-
- os.Exit(1)
apiKeyFile := flag.String("apiKeyFile", "", "File to load the apiKey from. If empty, it will defer to the READWISE_KEY environment variable")
targetFolder := flag.String("targetFolder", "", "Folder to write highlights (in org file) into")
flag.Parse()