e6dl/e621/download.go

110 lines
2.5 KiB
Go
Raw Normal View History

2019-03-14 03:16:40 +08:00
package e621
2019-03-01 03:47:50 +08:00
import (
"fmt"
"io/ioutil"
"path"
"strconv"
"strings"
"sync"
2019-03-01 08:29:47 +08:00
"time"
2019-03-01 03:47:50 +08:00
"github.com/dustin/go-humanize"
)
2019-03-01 08:06:58 +08:00
// BeginDownload takes a slice of posts, a directory to save them in, and a
// number of concurrent workers to make. It blocks until all the post have
// been processed. It returns the number of successes, failures, and the total
// amount of posts.
func BeginDownload(posts *[]Post, saveDirectory *string, maxConcurrents *int) (*int, *int, *int) {
2019-03-01 03:47:50 +08:00
var wg sync.WaitGroup
var completed int
2019-03-01 08:06:58 +08:00
var successes int
var failures int
2019-03-01 03:47:50 +08:00
total := len(*posts)
// Distribute the posts based on the number of workers
ppw := len(*posts) / *maxConcurrents // ppw: posts per worker
mod := len(*posts) % *maxConcurrents // mod: remainder of posts
2019-03-01 03:47:50 +08:00
for i := 0; i < *maxConcurrents; i++ {
postsLower := i * ppw
postsUpper := i*ppw + ppw
if i == *maxConcurrents-1 {
// Give the last worker the remaining posts
// TODO: compensate it for labor
postsUpper += mod
}
wg.Add(1)
2019-03-01 08:06:58 +08:00
go work(i+1, (*posts)[postsLower:postsUpper], *saveDirectory, &completed, &successes, &failures, &total, &wg)
2019-03-01 08:29:47 +08:00
// Spawn workers with a little bit of a delay so as to not DDOS e621
// but also make the initial numbers show up correctly
time.Sleep(50 * time.Millisecond)
2019-03-01 03:47:50 +08:00
}
wg.Wait()
2019-03-01 08:06:58 +08:00
return &successes, &failures, &total
2019-03-01 03:47:50 +08:00
}
2019-03-01 08:06:58 +08:00
func work(wn int, posts []Post, directory string, completed *int, successes *int, failures *int, total *int, wg *sync.WaitGroup) {
2019-03-01 03:47:50 +08:00
defer wg.Done()
for _, post := range posts {
*completed++
2019-03-01 08:06:58 +08:00
2019-03-01 03:57:42 +08:00
fmt.Printf(
"[%d/%d] [w%d] Downloading post %d (%s) -> %s...\n",
*completed,
*total,
wn,
post.ID,
humanize.Bytes(uint64(post.FileSize)),
getSavePath(&post, &directory),
)
2019-03-01 08:06:58 +08:00
err := downloadPost(&post, directory)
if err != nil {
2019-03-01 08:14:09 +08:00
fmt.Printf("[w%d] Failed to download post %d: %v\n", wn, post.ID, err)
2019-03-01 08:06:58 +08:00
*failures++
} else {
*successes++
}
2019-03-01 03:47:50 +08:00
}
}
2019-03-01 03:57:42 +08:00
func getSavePath(post *Post, directory *string) string {
2019-03-01 03:47:50 +08:00
pathSliced := strings.Split(post.FileURL, ".")
extension := pathSliced[len(pathSliced)-1]
2019-03-01 03:57:42 +08:00
savePath := path.Join(*directory, strconv.Itoa(post.ID)+"."+extension)
return savePath
}
2019-03-01 08:06:58 +08:00
func downloadPost(post *Post, directory string) error {
2019-03-01 03:57:42 +08:00
savePath := getSavePath(post, &directory)
2019-03-01 03:47:50 +08:00
resp, err := HTTPGet(post.FileURL)
if err != nil {
2019-03-01 08:06:58 +08:00
return err
2019-03-01 03:47:50 +08:00
}
2019-03-01 08:06:58 +08:00
2019-03-01 03:47:50 +08:00
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
2019-03-01 08:06:58 +08:00
return err
2019-03-01 03:47:50 +08:00
}
err = ioutil.WriteFile(savePath, body, 0755)
if err != nil {
2019-03-01 08:06:58 +08:00
return err
2019-03-01 03:47:50 +08:00
}
2019-03-01 08:06:58 +08:00
return nil
2019-03-01 03:47:50 +08:00
}