feat: Waku v2 bridge

Issue #12610
2023-11-12 13:29:38 +01:00
parent 56e7bd01ca
commit 6d31343205
6716 changed files with 1982502 additions and 5891 deletions
@@ -0,0 +1,201 @@
+package webseed
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"strings"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/anacrolix/torrent/common"
+	"github.com/anacrolix/torrent/metainfo"
+	"github.com/anacrolix/torrent/segments"
+)
+
+type RequestSpec = segments.Extent
+
+type requestPartResult struct {
+	resp *http.Response
+	err  error
+}
+
+type requestPart struct {
+	req    *http.Request
+	e      segments.Extent
+	result chan requestPartResult
+	start  func()
+	// Wrap http response bodies for such things as download rate limiting.
+	responseBodyWrapper ResponseBodyWrapper
+}
+
+type Request struct {
+	cancel func()
+	Result chan RequestResult
+}
+
+func (r Request) Cancel() {
+	r.cancel()
+}
+
+type Client struct {
+	HttpClient *http.Client
+	Url        string
+	fileIndex  segments.Index
+	info       *metainfo.Info
+	// The pieces we can request with the Url. We're more likely to ban/block at the file-level
+	// given that's how requests are mapped to webseeds, but the torrent.Client works at the piece
+	// level. We can map our file-level adjustments to the pieces here. This probably need to be
+	// private in the future, if Client ever starts removing pieces.
+	Pieces              roaring.Bitmap
+	ResponseBodyWrapper ResponseBodyWrapper
+}
+
+type ResponseBodyWrapper func(io.Reader) io.Reader
+
+func (me *Client) SetInfo(info *metainfo.Info) {
+	if !strings.HasSuffix(me.Url, "/") && info.IsDir() {
+		// In my experience, this is a non-conforming webseed. For example the
+		// http://ia600500.us.archive.org/1/items URLs in archive.org torrents.
+		return
+	}
+	me.fileIndex = segments.NewIndex(common.LengthIterFromUpvertedFiles(info.UpvertedFiles()))
+	me.info = info
+	me.Pieces.AddRange(0, uint64(info.NumPieces()))
+}
+
+type RequestResult struct {
+	Bytes []byte
+	Err   error
+}
+
+func (ws *Client) NewRequest(r RequestSpec) Request {
+	ctx, cancel := context.WithCancel(context.Background())
+	var requestParts []requestPart
+	if !ws.fileIndex.Locate(r, func(i int, e segments.Extent) bool {
+		req, err := NewRequest(ws.Url, i, ws.info, e.Start, e.Length)
+		if err != nil {
+			panic(err)
+		}
+		req = req.WithContext(ctx)
+		part := requestPart{
+			req:                 req,
+			result:              make(chan requestPartResult, 1),
+			e:                   e,
+			responseBodyWrapper: ws.ResponseBodyWrapper,
+		}
+		part.start = func() {
+			go func() {
+				resp, err := ws.HttpClient.Do(req)
+				part.result <- requestPartResult{
+					resp: resp,
+					err:  err,
+				}
+			}()
+		}
+		requestParts = append(requestParts, part)
+		return true
+	}) {
+		panic("request out of file bounds")
+	}
+	req := Request{
+		cancel: cancel,
+		Result: make(chan RequestResult, 1),
+	}
+	go func() {
+		b, err := readRequestPartResponses(ctx, requestParts)
+		req.Result <- RequestResult{
+			Bytes: b,
+			Err:   err,
+		}
+	}()
+	return req
+}
+
+type ErrBadResponse struct {
+	Msg      string
+	Response *http.Response
+}
+
+func (me ErrBadResponse) Error() string {
+	return me.Msg
+}
+
+func recvPartResult(ctx context.Context, buf io.Writer, part requestPart) error {
+	result := <-part.result
+	// Make sure there's no further results coming, it should be a one-shot channel.
+	close(part.result)
+	if result.err != nil {
+		return result.err
+	}
+	defer result.resp.Body.Close()
+	var body io.Reader = result.resp.Body
+	if part.responseBodyWrapper != nil {
+		body = part.responseBodyWrapper(body)
+	}
+	// Prevent further accidental use
+	result.resp.Body = nil
+	if ctx.Err() != nil {
+		return ctx.Err()
+	}
+	switch result.resp.StatusCode {
+	case http.StatusPartialContent:
+		copied, err := io.Copy(buf, body)
+		if err != nil {
+			return err
+		}
+		if copied != part.e.Length {
+			return fmt.Errorf("got %v bytes, expected %v", copied, part.e.Length)
+		}
+		return nil
+	case http.StatusOK:
+		// This number is based on
+		// https://archive.org/download/BloodyPitOfHorror/BloodyPitOfHorror.asr.srt. It seems that
+		// archive.org might be using a webserver implementation that refuses to do partial
+		// responses to small files.
+		if part.e.Start < 48<<10 {
+			if part.e.Start != 0 {
+				log.Printf("resp status ok but requested range [url=%q, range=%q]",
+					part.req.URL,
+					part.req.Header.Get("Range"))
+			}
+			// Instead of discarding, we could try receiving all the chunks present in the response
+			// body. I don't know how one would handle multiple chunk requests resulting in an OK
+			// response for the same file. The request algorithm might be need to be smarter for
+			// that.
+			discarded, _ := io.CopyN(io.Discard, body, part.e.Start)
+			if discarded != 0 {
+				log.Printf("discarded %v bytes in webseed request response part", discarded)
+			}
+			_, err := io.CopyN(buf, body, part.e.Length)
+			return err
+		} else {
+			return ErrBadResponse{"resp status ok but requested range", result.resp}
+		}
+	case http.StatusServiceUnavailable:
+		return ErrTooFast
+	default:
+		return ErrBadResponse{
+			fmt.Sprintf("unhandled response status code (%v)", result.resp.StatusCode),
+			result.resp,
+		}
+	}
+}
+
+var ErrTooFast = errors.New("making requests too fast")
+
+func readRequestPartResponses(ctx context.Context, parts []requestPart) (_ []byte, err error) {
+	var buf bytes.Buffer
+	for _, part := range parts {
+		part.start()
+		err = recvPartResult(ctx, &buf, part)
+		if err != nil {
+			err = fmt.Errorf("reading %q at %q: %w", part.req.URL, part.req.Header.Get("Range"), err)
+			break
+		}
+	}
+	return buf.Bytes(), err
+}
@@ -0,0 +1,40 @@
+package webseed
+
+import (
+	"fmt"
+	"net/http"
+	"net/url"
+	"path"
+	"strings"
+
+	"github.com/anacrolix/torrent/metainfo"
+)
+
+func trailingPath(infoName string, pathComps []string) string {
+	return path.Join(
+		func() (ret []string) {
+			for _, comp := range append([]string{infoName}, pathComps...) {
+				ret = append(ret, url.QueryEscape(comp))
+			}
+			return
+		}()...,
+	)
+}
+
+// Creates a request per BEP 19.
+func NewRequest(url_ string, fileIndex int, info *metainfo.Info, offset, length int64) (*http.Request, error) {
+	fileInfo := info.UpvertedFiles()[fileIndex]
+	if strings.HasSuffix(url_, "/") {
+		// BEP specifies that we append the file path. We need to escape each component of the path
+		// for things like spaces and '#'.
+		url_ += trailingPath(info.Name, fileInfo.Path)
+	}
+	req, err := http.NewRequest(http.MethodGet, url_, nil)
+	if err != nil {
+		return nil, err
+	}
+	if offset != 0 || length != fileInfo.Length {
+		req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", offset, offset+length-1))
+	}
+	return req, nil
+}