Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New RSS support, and improvements to http-get. #54

Merged
merged 2 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,18 @@ For examples please consult the output of `sysbox help expect`, but a simple exa



## feeds

The feeds sub-command retrieves the contents of the (single) remote URL which is specified, and outputs a list of all the RSS/Atom feeds which have been referenced within that file.

Basic usage would be:

$ sysbox feeds https://blog.steve.fi/

If no protocol is specified "https" is assumed, (for example an argument of "example.com" will be converted to https://example.com).



## find

The find sub-command allows finding files/directories that match a given number
Expand Down Expand Up @@ -269,6 +281,12 @@ A simple HTTP-server. Allows serving to localhost, or to the local LAN.

Very much "curl-lite", allows you to fetch the contents of a remote URL. SSL errors, etc, are handled, but only minimal options are supported.

Basic usage would be:

$ sysbox http-get https://example.com/

If no protocol is specified "https" is assumed, (for example an argument of "example.com" will be converted to https://example.com).



## ips
Expand Down
134 changes: 134 additions & 0 deletions cmd_feeds.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package main

import (
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"

"github.com/skx/subcommands"
"golang.org/x/net/html"
)

// Structure for our options and state.
type feedsCommand struct {

// We embed the NoFlags option, because we accept no command-line flags.
subcommands.NoFlags
}

// ErrNoFeeds is used if no feeds are found in a remote URL
var ErrNoFeeds = errors.New("NO-FEED")

// Info returns the name of this subcommand.
func (t *feedsCommand) Info() (string, string) {
return "feeds", `Extract RSS feeds from remote URLS.

Details:

This command fetches the contents of the specified URL, much like
the 'http-get' command would, and extracts any specified RSS feed
from the contents of that remote URL.

Examples:

$ sysbox feeds https://blog.steve.fi/`
}

func (t *feedsCommand) FindFeeds(base string) ([]string, error) {

ret := []string{}

if !strings.HasPrefix(base, "http") {
base = "https://" + base
}

// Make the request
response, err := http.Get(base)
if err != nil {
return ret, err
}

// Get the body.
defer response.Body.Close()

z := html.NewTokenizer(response.Body)

for {
tt := z.Next()
switch tt {
case html.ErrorToken:
err := z.Err()
if err == io.EOF {
if len(ret) > 0 {
return ret, nil
}
return ret, ErrNoFeeds
}
return ret, fmt.Errorf("%s", z.Err())
case html.StartTagToken, html.SelfClosingTagToken:
t := z.Token()
if t.Data == "link" {
isRSS := false
u := ""
for _, attr := range t.Attr {
if attr.Key == "type" && (attr.Val == "application/rss+xml" || attr.Val == "application/atom+xml") {
isRSS = true
}

if attr.Key == "href" {
u = attr.Val
}
}
if isRSS {
if !strings.HasPrefix(u, "http") {
u, _ = url.JoinPath(base, u)
}
ret = append(ret, u)
}
}
}
}

// Nothing found?
if len(ret) == 0 {
return ret, ErrNoFeeds
}
return ret, nil
}

// Execute is invoked if the user specifies `feeds` as the subcommand.
func (t *feedsCommand) Execute(args []string) int {

// Ensure we have only a single URL
if len(args) != 1 {
fmt.Printf("Usage: feeds URL\n")
return 1
}

// The URL
url := args[0]

// We'll default to https if the protocol isn't specified.
if !strings.HasPrefix(url, "http") {
url = "https://" + url
}

out, err := t.FindFeeds(url)
if err != nil {
if err == ErrNoFeeds {
fmt.Printf("No Feeds found in %s\n", url)
} else {
fmt.Printf("Error processing %s: %s\n", url, err)
return 1
}
} else {
for _, x := range out {
fmt.Printf("%s\n", x)
}
}

return 0
}
13 changes: 11 additions & 2 deletions cmd_http_get.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"io"
"net/http"
"sort"
"strings"
)

// Structure for our options and state.
Expand Down Expand Up @@ -51,10 +52,18 @@ func (hg *httpGetCommand) Execute(args []string) int {
return 1
}

// The URL
url := args[0]

// We'll default to https if the protocol isn't specified.
if !strings.HasPrefix(url, "http") {
url = "https://" + url
}

// Make the request
response, err := http.Get(args[0])
response, err := http.Get(url)
if err != nil {
fmt.Printf("error: %s", err.Error())
fmt.Printf("error fetching %s: %s", url, err.Error())
return 1
}

Expand Down
1 change: 1 addition & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ func main() {
subcommands.Register(&envTemplateCommand{})
subcommands.Register(&execSTDINCommand{})
subcommands.Register(&expectCommand{})
subcommands.Register(&feedsCommand{})
subcommands.Register(&findCommand{})
subcommands.Register(&fingerdCommand{})
subcommands.Register(&html2TextCommand{})
Expand Down
Loading