gomad/main.go

package main

import (
	"archive/zip"
	"flag"
	"fmt"
	"io"
	"log"
	"os"
	"regexp"
	"strings"
	"sync"

	"github.com/PuerkitoBio/goquery"
	"github.com/gocolly/colly/v2"
	"github.com/schollz/progressbar/v3"
)

var simplePtr *bool

type pageInfo struct {
	StatusCode int
	Links      map[string]int
}

type series struct {
	Name     string
	Chapters []chapter
}

type chapter struct {
	Name string
	Link string
}

func extractSeriesInfo(url string) (string, string) {
	// Pattern: /series/{ID}/{name}
	c := colly.NewCollector()

	var name string
	c.OnHTML("#top", func(e *colly.HTMLElement) {
		name = e.DOM.Find("h1").First().Text()
	})

	c.Visit(url)
	re := regexp.MustCompile(`/series/([^/]+)`)
	matches := re.FindStringSubmatch(url)
	if len(matches) < 2 {
		return "", name
	}

	return matches[1], name
}

func FormatChapterString(s string) string {
	parts := strings.SplitN(s, ".", 2)
	intPart := parts[0]
	for len(intPart) < 3 {
		intPart = "0" + intPart
	}
	if len(parts) == 1 {
		return intPart
	}
	return intPart + "." + parts[1]
}

func getAllChapters(seriesID string, single float64) []chapter {
	url := fmt.Sprintf("https://weebcentral.com/series/%s/full-chapter-list", seriesID)

	c := colly.NewCollector()

	p := &pageInfo{Links: make(map[string]int)}

	chapt := []chapter{}

	// count links
	c.OnHTML("div", func(e *colly.HTMLElement) {

		e.DOM.Find("a").Each(func(i int, h *goquery.Selection) {
			link, _ := h.Attr("href")
			name := strings.Split(h.Find("span span").First().Text(), " ")[1]
			chNum := FormatChapterString(name)

			if single != 9999 {
				if name == fmt.Sprintf("%g", single) {
					chapt = append(chapt, chapter{
						Name: "Chapter " + chNum,
						Link: link,
					})
				}

			} else {
				chapt = append(chapt, chapter{
					Name: "Chapter " + chNum,
					Link: link,
				})
			}
		})

	})

	// extract status code
	c.OnResponse(func(r *colly.Response) {
		log.Println("response received", r.StatusCode)
		p.StatusCode = r.StatusCode
	})
	c.OnError(func(r *colly.Response, err error) {
		log.Println("error:", r.StatusCode, err)
		p.StatusCode = r.StatusCode
	})

	c.Visit(url)

	//slices.Reverse(chapt)
	return chapt
}

var wg sync.WaitGroup

func downloadImage(url string, path string) {

	c := colly.NewCollector()

	c.OnResponse(func(r *colly.Response) {
		defer wg.Done()
		fullFileName := strings.Split(r.FileName(), "_")
		newFileName := fullFileName[len(fullFileName)-1]
		r.Save(path + newFileName)
	})
	c.Visit(url)
}

func downloadChapter(chapterPath string, url string) {
	os.Mkdir(chapterPath, 0775)
	c := colly.NewCollector()
	count := 0
	c.OnHTML("section", func(h *colly.HTMLElement) {
		images := h.DOM.Find("img")
		imagesLength := images.Length()

		bar := progressbar.NewOptions(imagesLength,
			progressbar.OptionEnableColorCodes(true),
			progressbar.OptionFullWidth(),
			progressbar.OptionShowCount(),
			progressbar.OptionSetDescription("[cyan][1/2][reset] Downloading"),
			progressbar.OptionSetTheme(progressbar.Theme{
				Saucer:        "[green]=[reset]",
				SaucerHead:    "[green]>[reset]",
				SaucerPadding: " ",
				BarStart:      "[",
				BarEnd:        "]",
			}))
		images.Each(func(i int, s *goquery.Selection) {
			imgSrc, _ := s.Attr("src")
			if !*simplePtr {
				bar.Add(1)
			}
			wg.Add(1)
			count++
			go downloadImage(imgSrc, chapterPath+"/")
			// with this I can download 10 images at the same time
			if count == 10 || i == imagesLength-1 {
				wg.Wait()
				count = 0
			}
		})
	})

	c.Visit(fmt.Sprint(url, "/images?is_prev=False&current_page=1&reading_style=long_strip"))

}

func compressToCbz(chapterPath string) {
	path := chapterPath + ".cbz"
	archive, err := os.Create(path)
	if err != nil {
		panic(err)
	}
	defer archive.Close()

	// Create a new zip writer
	zipWriter := zip.NewWriter(archive)

	dir, err := os.ReadDir(chapterPath)

	bar := progressbar.NewOptions(len(dir),
		progressbar.OptionEnableColorCodes(true),
		progressbar.OptionFullWidth(),
		progressbar.OptionShowCount(),
		progressbar.OptionSetDescription("[magenta][2/2][reset] Compressing"),
		progressbar.OptionSetTheme(progressbar.Theme{
			Saucer:        "[green]=[reset]",
			SaucerHead:    "[green]>[reset]",
			SaucerPadding: " ",
			BarStart:      "[",
			BarEnd:        "]",
		}))

	for _, e := range dir {
		f1, err := os.Open(chapterPath + "/" + e.Name())
		if err != nil {
			panic(err)
		}
		if !*simplePtr {
			bar.Add(1)
		}
		defer f1.Close()

		w1, err := zipWriter.Create(e.Name())
		if err != nil {
			panic(err)
		}
		if _, err := io.Copy(w1, f1); err != nil {
			panic(err)
		}

	}
	fmt.Println()
	zipWriter.Close()
}

func downloadSeries(url string, downloadPath string, single float64) {
	log.Println("Downloading from", url)

	seriesID, seriesName := extractSeriesInfo(url)

	s := series{}

	s.Name = seriesName
	s.Chapters = getAllChapters(seriesID, single)

	log.Printf("Found %d Chapters\n", len(s.Chapters))

	seriesPath := fmt.Sprintf("%s%s", downloadPath, seriesName)

	os.Mkdir(seriesPath, 0775)

	log.Println("Starting to download", seriesName)
	downloadCount := 0
	for _, c := range s.Chapters {
		chapterPath := fmt.Sprintf("%s/%s", seriesPath, c.Name)
		if _, err := os.Stat(seriesPath + "/" + c.Name + ".cbz"); os.IsNotExist(err) {
			log.Println("Downloading ", c.Name)
			downloadChapter(chapterPath, c.Link)
			//log.Println("Compressing", c.Name)
			compressToCbz(chapterPath)
			os.RemoveAll(chapterPath)
			downloadCount++
		}

		// if after 5 five files no download has happened just move on to the next
		//if i == 5 && downloadCount < 6 {
		//	break
		//}
	}

	if downloadCount == 0 {
		log.Println("No new Chapters downloaded")
	}
}

func main() {
	urlPtr := flag.String("url", "", "WeebCentral URL")
	dlPathPtr := flag.String("path", "", "Download Path")
	filePtr := flag.String("file", "", "Path to file for multi download")
	chapterPtr := flag.Float64("c", 9999, "Specific chapter")
	simplePtr = flag.Bool("s", false, "Simple output for readable logs")

	flag.Parse()
	if *urlPtr == "" && *filePtr == "" {
		fmt.Println("Please use a url to use this command")
		os.Exit(1)
	}
	//if *dlPathPtr == "" {
	//	fmt.Println("Please provide a path to download the files to")
	//	os.Exit(1)
	//}

	if *filePtr != "" {
		dat, err := os.ReadFile(*filePtr)
		if err != nil {
			fmt.Println("Could not find file")
		}
		paths := strings.Split(strings.TrimRight(string(dat), "\n"), "\n")

		for _, p := range paths {
			downloadSeries(p, *dlPathPtr, *chapterPtr)
		}
		os.Exit(0)
	}

	downloadSeries(*urlPtr, *dlPathPtr, *chapterPtr)
}