I use the Go program below to get notifications in my RSS reader when websites change that don’t offer RSS feeds themselves. For each website you would create a new command in main(), choose a shortname, enter the URL and enter a HTML node selector for the part you are interested in (thus also excluding surrounding stuff that might be dynamically created on each visit). You would then call this program with “go run webwatcher SHORTNAME” in your RSS reader.
package main
import (
"bytes"
"encoding/json"
"fmt"
. "git.fireandbrimst.one/aw/goutil/html"
"git.fireandbrimst.one/aw/goutil/misc"
xnetHtml "golang.org/x/net/html"
"io/ioutil"
"os"
"path"
"text/template"
"time"
)
const (
DL_LIMIT = 15 * 1024 * 1024
CACHE_FOLDER = "cache"
)
const RSS_TEMPLATE string = `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<title><![CDATA[ {{.Shortname}} ]]></title>
<link><![CDATA[ {{.URL}} ]]></link>
<description><![CDATA[ {{.Shortname}} ]]></description>
<item>
<title><![CDATA[ {{.URL}} ]]></title>
<content:encoded><![CDATA[ {{.LastContent}} ]]></content:encoded>
<guid><![CDATA[ {{.URL}}/{{.LastModified.Format "20060102-150405"}} ]]></guid>
<link><![CDATA[ {{.URL}} ]]></link>
<pubDate>{{.LastModified.Format "Mon, 02 Jan 2006 15:04:05 -0700"}}</pubDate>
</item>
</channel>
</rss>
`
func optPanic(err error) {
if err != nil {
panic(err)
}
}
type command struct {
shortname string
URL string
selector func(n *HtmlNode) bool
}
func (c *command) filename() string {
return path.Join(CACHE_FOLDER, c.shortname)
}
func (c *command) getContent() string {
b, err := misc.DownloadAll(c.URL, DL_LIMIT)
optPanic(err)
tmpdoc, err := xnetHtml.Parse(bytes.NewReader(b))
optPanic(err)
doc := (*HtmlNode)(tmpdoc)
n := doc.Find(c.selector)
var buf bytes.Buffer
xnetHtml.Render(&buf, (*xnetHtml.Node)(n))
return buf.String()
}
func unmarshalHPObject(filename string) HP {
bytes, err := ioutil.ReadFile(filename)
if err != nil {
bytes = []byte{}
}
var hpObject HP
err = json.Unmarshal(bytes, &hpObject)
if err != nil {
hpObject = HP{}
}
return hpObject
}
func marshalHPObject(filename string, hp HP) {
bytes, err := json.MarshalIndent(hp, "", " ")
optPanic(err)
err = ioutil.WriteFile(filename, bytes, 0644)
optPanic(err)
}
func (c *command) genRSS() {
content := c.getContent()
hpObject := unmarshalHPObject(c.filename())
hpObject.Shortname = c.shortname
hpObject.URL = c.URL
if content != hpObject.LastContent {
hpObject.LastContent = content
hpObject.LastModified = time.Now()
}
err := rssTemplate.Execute(os.Stdout, hpObject)
optPanic(err)
marshalHPObject(c.filename(), hpObject)
}
type HP struct {
Shortname string
URL string
LastContent string
LastModified time.Time
}
var rssTemplate *template.Template
func main() {
rssTemplate = template.Must(template.New("rss").Parse(RSS_TEMPLATE))
os.Mkdir(CACHE_FOLDER, 0755)
commands := []command{
command{"stilldrinking", "https://www.stilldrinking.org/", IsTag("div").And(HasID("cont"))},
}
for _, command := range commands {
if command.shortname == os.Args[1] {
command.genRSS()
os.Exit(0)
}
}
fmt.Fprintln(os.Stderr, "unknown command", os.Args[1])
os.Exit(-1)
}
Posted in
programming
2020-11-27 14:25 UTC