e6dl/concurrent/download.go

143 lines
3.1 KiB
Go
Raw Normal View History

2019-03-14 03:20:04 +08:00
package concurrent
2019-03-01 03:47:50 +08:00
import (
"fmt"
"io/ioutil"
"path"
"strconv"
2019-03-30 00:33:56 +08:00
"time"
2019-03-01 03:47:50 +08:00
"github.com/dustin/go-humanize"
2019-03-14 04:21:50 +08:00
"github.com/logrusorgru/aurora"
2022-11-10 23:02:37 +08:00
"github.com/BitHeaven-Official/e6dl/e621"
2019-03-01 03:47:50 +08:00
)
2019-03-30 01:36:15 +08:00
// workState stores the state of all the jobs and
// is shared across workers
type workState struct {
2019-03-30 02:42:11 +08:00
Total int
Completed int
Successes int
Failures int
SaveDirectory string
2019-03-30 01:36:15 +08:00
}
2019-03-01 08:06:58 +08:00
// BeginDownload takes a slice of posts, a directory to save them in, and a
// number of concurrent workers to make. It blocks until all the post have
// been processed. It returns the number of successes, failures, and the total
// amount of posts.
2019-03-14 03:20:04 +08:00
func BeginDownload(posts *[]e621.Post, saveDirectory *string, maxConcurrents *int) (*int, *int, *int) {
2019-03-30 02:42:11 +08:00
// Channel for main goroutine to give workers a post when they are done downloading one
wc := make(chan *e621.Post)
2019-03-30 00:17:46 +08:00
2019-03-30 00:33:56 +08:00
var current int
2019-03-01 03:47:50 +08:00
total := len(*posts)
2019-03-30 01:36:15 +08:00
state := workState{
2019-03-30 02:42:11 +08:00
Total: total,
SaveDirectory: *saveDirectory,
2019-03-30 01:36:15 +08:00
}
2019-03-30 00:17:46 +08:00
// If we have more workers than posts, then we don't need all of them
if *maxConcurrents > total {
*maxConcurrents = total
}
2019-03-01 03:47:50 +08:00
for i := 0; i < *maxConcurrents; i++ {
2019-03-30 00:17:46 +08:00
// Create our workers
2019-03-30 02:42:11 +08:00
go work(i+1, &state, wc)
2019-03-01 03:47:50 +08:00
2019-03-30 00:17:46 +08:00
// Give them their initial posts
2019-03-30 02:42:11 +08:00
wc <- &(*posts)[current]
2019-03-30 00:17:46 +08:00
current++
2019-03-30 00:33:56 +08:00
time.Sleep(time.Millisecond * 50)
2019-03-30 00:17:46 +08:00
}
for {
2019-03-30 02:42:11 +08:00
// Wait for a worker to be done (they send nil to wc)
<-wc
2019-03-30 00:17:46 +08:00
// If we finished downloading all posts, break out of the loop
2019-03-30 01:36:15 +08:00
if state.Successes+state.Failures == total {
2019-03-30 00:17:46 +08:00
break
2019-03-01 03:47:50 +08:00
}
2019-03-30 00:17:46 +08:00
// If there's no more posts to give, stop the worker
2019-03-30 00:33:56 +08:00
if current >= total {
2019-03-30 02:42:11 +08:00
wc <- nil
2019-03-30 00:17:46 +08:00
continue
}
2019-03-01 03:47:50 +08:00
2019-03-30 00:17:46 +08:00
// Give the worker the next post in the array
2019-03-30 02:42:11 +08:00
wc <- &(*posts)[current]
2019-03-30 00:33:56 +08:00
current++
2019-03-30 00:17:46 +08:00
}
2019-03-01 08:06:58 +08:00
2019-03-30 01:36:15 +08:00
return &state.Successes, &state.Failures, &total
2019-03-01 03:47:50 +08:00
}
2019-03-30 02:42:11 +08:00
func work(wn int, state *workState, wc chan *e621.Post) {
2019-03-30 00:17:46 +08:00
for {
2019-03-30 01:36:15 +08:00
state.Completed++
2019-03-01 08:06:58 +08:00
2019-03-30 00:33:56 +08:00
// Wait for a post from main
2019-03-30 02:42:11 +08:00
post := <-wc
2019-03-30 00:33:56 +08:00
if post == nil { // nil means there aren't any more posts, so we're OK to break
2019-03-30 00:17:46 +08:00
return
}
2019-03-30 01:36:15 +08:00
progress := aurora.Sprintf(aurora.Green("[%d/%d]"), state.Completed, state.Total)
2019-03-14 04:21:50 +08:00
workerText := aurora.Sprintf(aurora.Cyan("[w%d]"), wn)
fmt.Println(aurora.Sprintf(
"%s %s Downloading post %d (%s) -> %s...",
progress,
workerText,
2019-03-01 03:57:42 +08:00
post.ID,
humanize.Bytes(uint64(post.File.Size)),
2019-03-30 02:42:11 +08:00
getSavePath(post, &state.SaveDirectory),
2019-03-14 04:21:50 +08:00
))
2019-03-01 08:06:58 +08:00
2019-03-30 02:42:11 +08:00
err := downloadPost(post, state.SaveDirectory)
2019-03-01 08:06:58 +08:00
if err != nil {
2019-03-01 08:14:09 +08:00
fmt.Printf("[w%d] Failed to download post %d: %v\n", wn, post.ID, err)
2019-03-30 01:36:15 +08:00
state.Failures++
2019-03-01 08:06:58 +08:00
} else {
2019-03-30 01:36:15 +08:00
state.Successes++
2019-03-01 08:06:58 +08:00
}
2019-03-30 00:17:46 +08:00
2019-03-30 02:44:47 +08:00
// Signal to main goroutine that we are done with this download
2019-03-30 02:42:11 +08:00
wc <- nil
2019-03-01 03:47:50 +08:00
}
}
2019-03-14 03:20:04 +08:00
func getSavePath(post *e621.Post, directory *string) string {
savePath := path.Join(*directory, strconv.Itoa(post.ID)+"."+post.File.Ext)
2019-03-01 03:57:42 +08:00
return savePath
}
2019-03-14 03:20:04 +08:00
func downloadPost(post *e621.Post, directory string) error {
2019-03-01 03:57:42 +08:00
savePath := getSavePath(post, &directory)
resp, err := e621.HTTPGet(post.File.URL)
2019-03-01 03:47:50 +08:00
if err != nil {
2019-03-01 08:06:58 +08:00
return err
2019-03-01 03:47:50 +08:00
}
2019-03-01 08:06:58 +08:00
2019-03-01 03:47:50 +08:00
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
2019-03-01 08:06:58 +08:00
return err
2019-03-01 03:47:50 +08:00
}
err = ioutil.WriteFile(savePath, body, 0755)
if err != nil {
2019-03-01 08:06:58 +08:00
return err
2019-03-01 03:47:50 +08:00
}
2019-03-01 08:06:58 +08:00
return nil
2019-03-01 03:47:50 +08:00
}