+201
@@ -0,0 +1,201 @@
|
||||
package webseed
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/anacrolix/torrent/common"
|
||||
"github.com/anacrolix/torrent/metainfo"
|
||||
"github.com/anacrolix/torrent/segments"
|
||||
)
|
||||
|
||||
type RequestSpec = segments.Extent
|
||||
|
||||
type requestPartResult struct {
|
||||
resp *http.Response
|
||||
err error
|
||||
}
|
||||
|
||||
type requestPart struct {
|
||||
req *http.Request
|
||||
e segments.Extent
|
||||
result chan requestPartResult
|
||||
start func()
|
||||
// Wrap http response bodies for such things as download rate limiting.
|
||||
responseBodyWrapper ResponseBodyWrapper
|
||||
}
|
||||
|
||||
type Request struct {
|
||||
cancel func()
|
||||
Result chan RequestResult
|
||||
}
|
||||
|
||||
func (r Request) Cancel() {
|
||||
r.cancel()
|
||||
}
|
||||
|
||||
type Client struct {
|
||||
HttpClient *http.Client
|
||||
Url string
|
||||
fileIndex segments.Index
|
||||
info *metainfo.Info
|
||||
// The pieces we can request with the Url. We're more likely to ban/block at the file-level
|
||||
// given that's how requests are mapped to webseeds, but the torrent.Client works at the piece
|
||||
// level. We can map our file-level adjustments to the pieces here. This probably need to be
|
||||
// private in the future, if Client ever starts removing pieces.
|
||||
Pieces roaring.Bitmap
|
||||
ResponseBodyWrapper ResponseBodyWrapper
|
||||
}
|
||||
|
||||
type ResponseBodyWrapper func(io.Reader) io.Reader
|
||||
|
||||
func (me *Client) SetInfo(info *metainfo.Info) {
|
||||
if !strings.HasSuffix(me.Url, "/") && info.IsDir() {
|
||||
// In my experience, this is a non-conforming webseed. For example the
|
||||
// http://ia600500.us.archive.org/1/items URLs in archive.org torrents.
|
||||
return
|
||||
}
|
||||
me.fileIndex = segments.NewIndex(common.LengthIterFromUpvertedFiles(info.UpvertedFiles()))
|
||||
me.info = info
|
||||
me.Pieces.AddRange(0, uint64(info.NumPieces()))
|
||||
}
|
||||
|
||||
type RequestResult struct {
|
||||
Bytes []byte
|
||||
Err error
|
||||
}
|
||||
|
||||
func (ws *Client) NewRequest(r RequestSpec) Request {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
var requestParts []requestPart
|
||||
if !ws.fileIndex.Locate(r, func(i int, e segments.Extent) bool {
|
||||
req, err := NewRequest(ws.Url, i, ws.info, e.Start, e.Length)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
req = req.WithContext(ctx)
|
||||
part := requestPart{
|
||||
req: req,
|
||||
result: make(chan requestPartResult, 1),
|
||||
e: e,
|
||||
responseBodyWrapper: ws.ResponseBodyWrapper,
|
||||
}
|
||||
part.start = func() {
|
||||
go func() {
|
||||
resp, err := ws.HttpClient.Do(req)
|
||||
part.result <- requestPartResult{
|
||||
resp: resp,
|
||||
err: err,
|
||||
}
|
||||
}()
|
||||
}
|
||||
requestParts = append(requestParts, part)
|
||||
return true
|
||||
}) {
|
||||
panic("request out of file bounds")
|
||||
}
|
||||
req := Request{
|
||||
cancel: cancel,
|
||||
Result: make(chan RequestResult, 1),
|
||||
}
|
||||
go func() {
|
||||
b, err := readRequestPartResponses(ctx, requestParts)
|
||||
req.Result <- RequestResult{
|
||||
Bytes: b,
|
||||
Err: err,
|
||||
}
|
||||
}()
|
||||
return req
|
||||
}
|
||||
|
||||
type ErrBadResponse struct {
|
||||
Msg string
|
||||
Response *http.Response
|
||||
}
|
||||
|
||||
func (me ErrBadResponse) Error() string {
|
||||
return me.Msg
|
||||
}
|
||||
|
||||
func recvPartResult(ctx context.Context, buf io.Writer, part requestPart) error {
|
||||
result := <-part.result
|
||||
// Make sure there's no further results coming, it should be a one-shot channel.
|
||||
close(part.result)
|
||||
if result.err != nil {
|
||||
return result.err
|
||||
}
|
||||
defer result.resp.Body.Close()
|
||||
var body io.Reader = result.resp.Body
|
||||
if part.responseBodyWrapper != nil {
|
||||
body = part.responseBodyWrapper(body)
|
||||
}
|
||||
// Prevent further accidental use
|
||||
result.resp.Body = nil
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
switch result.resp.StatusCode {
|
||||
case http.StatusPartialContent:
|
||||
copied, err := io.Copy(buf, body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if copied != part.e.Length {
|
||||
return fmt.Errorf("got %v bytes, expected %v", copied, part.e.Length)
|
||||
}
|
||||
return nil
|
||||
case http.StatusOK:
|
||||
// This number is based on
|
||||
// https://archive.org/download/BloodyPitOfHorror/BloodyPitOfHorror.asr.srt. It seems that
|
||||
// archive.org might be using a webserver implementation that refuses to do partial
|
||||
// responses to small files.
|
||||
if part.e.Start < 48<<10 {
|
||||
if part.e.Start != 0 {
|
||||
log.Printf("resp status ok but requested range [url=%q, range=%q]",
|
||||
part.req.URL,
|
||||
part.req.Header.Get("Range"))
|
||||
}
|
||||
// Instead of discarding, we could try receiving all the chunks present in the response
|
||||
// body. I don't know how one would handle multiple chunk requests resulting in an OK
|
||||
// response for the same file. The request algorithm might be need to be smarter for
|
||||
// that.
|
||||
discarded, _ := io.CopyN(io.Discard, body, part.e.Start)
|
||||
if discarded != 0 {
|
||||
log.Printf("discarded %v bytes in webseed request response part", discarded)
|
||||
}
|
||||
_, err := io.CopyN(buf, body, part.e.Length)
|
||||
return err
|
||||
} else {
|
||||
return ErrBadResponse{"resp status ok but requested range", result.resp}
|
||||
}
|
||||
case http.StatusServiceUnavailable:
|
||||
return ErrTooFast
|
||||
default:
|
||||
return ErrBadResponse{
|
||||
fmt.Sprintf("unhandled response status code (%v)", result.resp.StatusCode),
|
||||
result.resp,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var ErrTooFast = errors.New("making requests too fast")
|
||||
|
||||
func readRequestPartResponses(ctx context.Context, parts []requestPart) (_ []byte, err error) {
|
||||
var buf bytes.Buffer
|
||||
for _, part := range parts {
|
||||
part.start()
|
||||
err = recvPartResult(ctx, &buf, part)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("reading %q at %q: %w", part.req.URL, part.req.Header.Get("Range"), err)
|
||||
break
|
||||
}
|
||||
}
|
||||
return buf.Bytes(), err
|
||||
}
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
package webseed
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/anacrolix/torrent/metainfo"
|
||||
)
|
||||
|
||||
func trailingPath(infoName string, pathComps []string) string {
|
||||
return path.Join(
|
||||
func() (ret []string) {
|
||||
for _, comp := range append([]string{infoName}, pathComps...) {
|
||||
ret = append(ret, url.QueryEscape(comp))
|
||||
}
|
||||
return
|
||||
}()...,
|
||||
)
|
||||
}
|
||||
|
||||
// Creates a request per BEP 19.
|
||||
func NewRequest(url_ string, fileIndex int, info *metainfo.Info, offset, length int64) (*http.Request, error) {
|
||||
fileInfo := info.UpvertedFiles()[fileIndex]
|
||||
if strings.HasSuffix(url_, "/") {
|
||||
// BEP specifies that we append the file path. We need to escape each component of the path
|
||||
// for things like spaces and '#'.
|
||||
url_ += trailingPath(info.Name, fileInfo.Path)
|
||||
}
|
||||
req, err := http.NewRequest(http.MethodGet, url_, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if offset != 0 || length != fileInfo.Length {
|
||||
req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", offset, offset+length-1))
|
||||
}
|
||||
return req, nil
|
||||
}
|
||||
Reference in New Issue
Block a user