346 lines
8.6 KiB
Go
346 lines
8.6 KiB
Go
package main
|
|
|
|
import (
|
|
"crypto/tls"
|
|
"encoding/json"
|
|
"errors"
|
|
"flag"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"path"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/logrusorgru/aurora"
|
|
"golang.org/x/net/proxy"
|
|
"gopkg.in/matryer/try.v1"
|
|
)
|
|
|
|
type Post struct {
|
|
PreviewURL string `json:"preview_url"`
|
|
SampleURL string `json:"sample_url"`
|
|
FileURL string `json:"file_url"`
|
|
Directory int `json:"directory"`
|
|
Hash string `json:"hash"`
|
|
Height int `json:"height"`
|
|
ID int `json:"id"`
|
|
Image string `json:"image"`
|
|
Change int `json:"change"`
|
|
Owner string `json:"owner"`
|
|
ParentID int `json:"parent_id"`
|
|
Rating string `json:"rating"`
|
|
Sample int `json:"sample"`
|
|
SampleHeight int `json:"sample_height"`
|
|
SampleWidth int `json:"sample_width"`
|
|
Score int `json:"score"`
|
|
Tags string `json:"tags"`
|
|
Width int `json:"width"`
|
|
}
|
|
|
|
// SetProxy
|
|
// set http proxy in the format `http://HOST:PORT`
|
|
// set socket proxy in the format `socks5://HOST:PORT`
|
|
func SetProxy(proxyAddr string, timeout int) (http.Transport, error) {
|
|
if strings.HasPrefix(proxyAddr, "http") {
|
|
urlproxy, err := url.Parse(proxyAddr)
|
|
if err != nil {
|
|
return http.Transport{}, err
|
|
}
|
|
transport := &http.Transport{
|
|
Proxy: http.ProxyURL(urlproxy),
|
|
TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
|
|
DialContext: (&net.Dialer{
|
|
Timeout: time.Duration(timeout) * time.Second,
|
|
}).DialContext,
|
|
}
|
|
return *transport, nil
|
|
}
|
|
if strings.HasPrefix(proxyAddr, "socks5") {
|
|
baseDialer := &net.Dialer{
|
|
Timeout: time.Duration(timeout) * time.Second,
|
|
KeepAlive: time.Duration(timeout) * time.Second,
|
|
}
|
|
socksHostPort := strings.ReplaceAll(proxyAddr, "socks5://", "")
|
|
dialSocksProxy, err := proxy.SOCKS5("tcp", socksHostPort, nil, baseDialer)
|
|
if err != nil {
|
|
return http.Transport{}, errors.New("error creating socks5 proxy :" + err.Error())
|
|
}
|
|
if contextDialer, ok := dialSocksProxy.(proxy.ContextDialer); ok {
|
|
dialContext := contextDialer.DialContext
|
|
transport := &http.Transport{
|
|
DialContext: dialContext,
|
|
}
|
|
return *transport, nil
|
|
} else {
|
|
return http.Transport{}, errors.New("failed type assertion to DialContext")
|
|
}
|
|
|
|
}
|
|
return http.Transport{}, errors.New("only support http(s) or socks5 protocol")
|
|
}
|
|
|
|
func GetPosts(tags string, page int, limit int, transport http.Transport) ([]Post, error) {
|
|
client := &http.Client{Transport: &transport}
|
|
posts := []Post{}
|
|
|
|
req, _ := http.NewRequest("GET", "https://api.rule34.xxx/index.php", nil)
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:6.1) Gecko/20100101 Firefox/6.1.9")
|
|
|
|
qs := req.URL.Query()
|
|
qs.Add("page", "dapi")
|
|
qs.Add("s", "post")
|
|
qs.Add("q", "index")
|
|
qs.Add("json", "1")
|
|
qs.Add("tags", tags)
|
|
qs.Add("limit", strconv.Itoa(limit))
|
|
qs.Add("pid", strconv.Itoa(page))
|
|
|
|
req.URL.RawQuery = qs.Encode()
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
defer resp.Body.Close()
|
|
|
|
body, err := ioutil.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
err = json.Unmarshal(body, &posts)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return posts, nil
|
|
}
|
|
|
|
// workState stores the state of all the jobs and
|
|
// is shared across workers
|
|
type workState struct {
|
|
Total int
|
|
Completed int
|
|
Successes int
|
|
Failures int
|
|
SaveDirectory string
|
|
}
|
|
|
|
// BeginDownload takes a slice of posts, a directory to save them in, and a
|
|
// number of concurrent workers to make. It blocks until all the post have
|
|
// been processed. It returns the number of successes, failures, and the total
|
|
// amount of posts.
|
|
func BeginDownload(posts *[]Post, saveDirectory *string, maxConcurrents *int, transport http.Transport) (*int, *int, *int) {
|
|
// Channel for main goroutine to give workers a post when they are done downloading one
|
|
wc := make(chan *Post)
|
|
|
|
var current int
|
|
|
|
total := len(*posts)
|
|
|
|
state := workState{
|
|
Total: total,
|
|
SaveDirectory: *saveDirectory,
|
|
}
|
|
|
|
// If we have more workers than posts, then we don't need all of them
|
|
if *maxConcurrents > total {
|
|
*maxConcurrents = total
|
|
}
|
|
|
|
for i := 0; i < *maxConcurrents; i++ {
|
|
// Create our workers
|
|
go work(i+1, &state, wc, transport)
|
|
|
|
// Give them their initial posts
|
|
wc <- &(*posts)[current]
|
|
current++
|
|
|
|
time.Sleep(time.Millisecond * 50)
|
|
}
|
|
|
|
for {
|
|
// Wait for a worker to be done (they send nil to wc)
|
|
<-wc
|
|
|
|
// If we finished downloading all posts, break out of the loop
|
|
if state.Successes+state.Failures == total {
|
|
break
|
|
}
|
|
|
|
// If there's no more posts to give, stop the worker
|
|
if current >= total {
|
|
wc <- nil
|
|
continue
|
|
}
|
|
|
|
// Give the worker the next post in the array
|
|
wc <- &(*posts)[current]
|
|
current++
|
|
}
|
|
|
|
return &state.Successes, &state.Failures, &total
|
|
}
|
|
|
|
func work(wn int, state *workState, wc chan *Post, transport http.Transport) {
|
|
for {
|
|
state.Completed++
|
|
|
|
// Wait for a post from main
|
|
post := <-wc
|
|
if post == nil { // nil means there aren't any more posts, so we're OK to break
|
|
return
|
|
}
|
|
|
|
progress := aurora.Sprintf(aurora.Green("[%d/%d]"), state.Completed, state.Total)
|
|
workerText := aurora.Sprintf(aurora.Cyan("[w%d]"), wn)
|
|
|
|
err := try.Do(func(attempt int) (retry bool, err error) {
|
|
if attempt == 1 {
|
|
fmt.Println(aurora.Sprintf(
|
|
"%s %s Downloading post %d -> %s",
|
|
progress,
|
|
workerText,
|
|
post.ID,
|
|
getSavePath(post, &state.SaveDirectory),
|
|
))
|
|
} else {
|
|
fmt.Println(aurora.Sprintf(
|
|
"%s %s (%d) Retry Download post %d -> %s",
|
|
progress,
|
|
workerText,
|
|
attempt,
|
|
post.ID,
|
|
getSavePath(post, &state.SaveDirectory),
|
|
))
|
|
}
|
|
|
|
err = downloadPost(post, state.SaveDirectory, transport)
|
|
return attempt < 5, err
|
|
})
|
|
|
|
if err != nil {
|
|
fmt.Printf("[w%d] Failed to download post %d: %v\n", wn, post.ID, err)
|
|
state.Failures++
|
|
} else {
|
|
state.Successes++
|
|
}
|
|
|
|
// Signal to main goroutine that we are done with this download
|
|
wc <- nil
|
|
}
|
|
}
|
|
|
|
func getSavePath(post *Post, directory *string) string {
|
|
savePath := path.Join(*directory, strconv.Itoa(post.ID)+path.Ext(post.FileURL))
|
|
return savePath
|
|
}
|
|
|
|
func downloadPost(post *Post, directory string, transport http.Transport) error {
|
|
savePath := getSavePath(post, &directory)
|
|
|
|
if _, err := os.Stat(savePath); err == nil {
|
|
fmt.Print("File exists, skip...\n")
|
|
} else {
|
|
resp, err := HTTPGet(post.FileURL, transport)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
defer resp.Body.Close()
|
|
|
|
body, err := ioutil.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err = ioutil.WriteFile(savePath, body, 0755)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// HTTPGet is a helper function that automatically adds the
|
|
// tool's UA to an HTTP GET request
|
|
func HTTPGet(url string, transport http.Transport) (*http.Response, error) {
|
|
client := &http.Client{Transport: &transport}
|
|
|
|
req, _ := http.NewRequest("GET", url, nil)
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:6.1) Gecko/20100101 Firefox/6.1.9")
|
|
|
|
return client.Do(req)
|
|
}
|
|
|
|
func main() {
|
|
tags := flag.String("tags", "", "Tags to search for")
|
|
maxConcurrents := flag.Int("concurrents", 30, "Maximum amount of concurrent downloads")
|
|
saveDirectory := flag.String("out", "dl", "The directory to write the downloaded posts to")
|
|
postLimit := flag.Int("limit", 9999999999, "Maximum amount of posts to grab from rule34")
|
|
proxyAddr := flag.String("proxy", "", "Proxy address to parsing")
|
|
timeout := flag.Int("timeout", 10, "Timeout proxy to parsing")
|
|
|
|
flag.Parse()
|
|
|
|
transport, err := SetProxy(*proxyAddr, *timeout)
|
|
if err != nil {
|
|
fmt.Println("[WARN] Proxy is not using or proxy error!")
|
|
}
|
|
|
|
allPosts := []Post{}
|
|
nlimit := (*postLimit)
|
|
|
|
i := 0
|
|
for {
|
|
fmt.Printf("Fetching page %d...", i)
|
|
|
|
currentPostsCount := 100
|
|
|
|
nlimit = nlimit - currentPostsCount
|
|
if nlimit <= 0 {
|
|
currentPostsCount += nlimit
|
|
}
|
|
|
|
posts, err := GetPosts(*tags, i, currentPostsCount, transport)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
fmt.Printf(" fetched %d posts\n", len(posts))
|
|
|
|
if len(posts) == 0 {
|
|
break
|
|
}
|
|
|
|
allPosts = append(allPosts, posts...)
|
|
if nlimit <= 0 {
|
|
break
|
|
}
|
|
|
|
i++
|
|
}
|
|
|
|
fmt.Printf("Found %d posts. Starting download with %d workers...\n\n", len(allPosts), *maxConcurrents)
|
|
|
|
cwd, _ := os.Getwd()
|
|
absSaveDir := path.Join(cwd, *saveDirectory)
|
|
|
|
err = os.MkdirAll(absSaveDir, 0755)
|
|
if err != nil {
|
|
fmt.Printf("Cannot create output directory (%s). Do you have the right permissions?\n", absSaveDir)
|
|
os.Exit(1)
|
|
}
|
|
|
|
successes, failures, _ := BeginDownload(&allPosts, saveDirectory, maxConcurrents, transport)
|
|
|
|
fmt.Printf("\nAll done! %d posts downloaded and saved. (%d failed to download)\n", *successes, *failures)
|
|
}
|