using .Pairs
using CodecZlib

"""
    safer_joinpath(basepart, parts...)
A variation on `joinpath`, that is more resistant to directory traversal attacks.
The parts to be joined (excluding the `basepart`),
are not allowed to contain `..`, or begin with a `/`.
If they do then this throws an `DomainError`.
"""
function safer_joinpath(basepart, parts...)
    explain =  "Possible directory traversal attack detected."
    for part in parts
        occursin("..", part) && throw(DomainError(part, "contains \"..\". $explain"))
        startswith(part, '/') && throw(DomainError(part, "begins with \"/\". $explain"))
    end
    joinpath(basepart, parts...)
end

function try_get_filename_from_headers(hdrs)
    for content_disp in hdrs
        # extract out of Content-Disposition line
        # rough version of what is needed in https://github.com/JuliaWeb/HTTP.jl/issues/179
        filename_part = match(r"filename\s*=\s*(.*)", content_disp)
        if filename_part !== nothing
            filename = filename_part[1]
            quoted_filename = match(r"\"(.*)\"", filename)
            if quoted_filename !== nothing
                # It was in quotes, so it will be double escaped
                filename = unescape_string(quoted_filename[1])
            end
            return filename == "" ? nothing : filename
        end
    end
    return nothing
end

function try_get_filename_from_request(req)
    function file_from_target(t)
        (t == "" || t == "/") && return nothing
        f = basename(URI(t).path) # URI(...).path to strip out e.g. query parts
        return (f == "" ? file_from_target(dirname(t)) : f)
    end

    # First try to get file from the original request URI
    oreq = req
    while oreq.parent !== nothing
        oreq = oreq.parent.request
    end
    f = file_from_target(oreq.target)
    f !== nothing && return f

    # Secondly try to get file from the last request URI
    return file_from_target(req.target)
end


determine_file(::Nothing, resp, hdrs) = determine_file(tempdir(), resp, hdrs)
# ^ We want to the filename if possible because extension is useful for FileIO.jl

function determine_file(path, resp, hdrs)
    if isdir(path)
        # we have been given a path to a directory
        # got to to workout what file to put there
        filename = something(
                        try_get_filename_from_headers(hdrs),
                        try_get_filename_from_request(resp.request),
                        basename(tempname())  # fallback, basically a random string
                    )

        
        safer_joinpath(path, filename)
    else
        # We have been given a full filepath
        path
    end
end

"""
    download(url, [local_path], [headers]; update_period=1, kw...)

Similar to `Base.download` this downloads a file, returning the filename.
If the `local_path`:
 - is not provided, then it is saved in a temporary directory
 - if part to a directory is provided then it is saved into that directory
 - otherwise the local path is uses as the filename to save to.

When saving into a directory, the filename is determined (where possible),
from the rules of the HTTP.

 - `update_period` controls how often (in seconds) to report the progress.
    - set to `Inf` to disable reporting
 - `headers` specifies headers to be used for the HTTP GET request
 - any additional keyword args (`kw...`) are passed on to the HTTP request.
"""
function download(url::AbstractString, local_path=nothing, headers=Header[]; update_period=1, kw...)
    format_progress(x) = round(x, digits=4)
    format_bytes(x) = !isfinite(x) ? "∞ B" : Base.format_bytes(round(Int, x))
    format_seconds(x) = "$(round(x; digits=2)) s"
    format_bytes_per_second(x) = format_bytes(x) * "/s"


    @debugv 1 "downloading $url"
    local file
    hdrs = String[]
    HTTP.open("GET", url, headers; kw...) do stream
        resp = startread(stream)
        # Store intermediate header from redirects to use for filename detection
        content_disp = header(resp, "Content-Disposition")
        !isempty(content_disp) && push!(hdrs, content_disp)
        eof(stream) && return  # don't do anything for streams we can't read (yet)

        file = determine_file(local_path, resp, hdrs)
        total_bytes = parse(Float64, header(resp, "Content-Length", "NaN"))
        downloaded_bytes = 0
        start_time = now()
        prev_time = now()

        if header(resp, "Content-Encoding") == "gzip"
            stream = GzipDecompressorStream(stream) # auto decoding
            total_bytes = NaN # We don't know actual total bytes if the content is zipped.
        end

        function report_callback()
            prev_time = now()
            taken_time = (prev_time - start_time).value / 1000 # in seconds
            average_speed = downloaded_bytes / taken_time
            remaining_bytes = total_bytes - downloaded_bytes
            remaining_time = remaining_bytes / average_speed
            completion_progress = downloaded_bytes / total_bytes

            @info("Downloading",
                  source=url,
                  dest = file,
                  progress = completion_progress |> format_progress,
                  time_taken = taken_time |> format_seconds,
                  time_remaining = remaining_time |> format_seconds,
                  average_speed = average_speed |> format_bytes_per_second,
                  downloaded = downloaded_bytes |> format_bytes,
                  remaining = remaining_bytes |> format_bytes,
                  total = total_bytes |> format_bytes,
                 )
        end

        Base.open(file, "w") do fh
            while(!eof(stream))
                downloaded_bytes += write(fh, readavailable(stream))
                if !isinf(update_period)
                    if now() - prev_time > Millisecond(round(1000update_period))
                        report_callback()
                    end
                end
            end
        end
        if !isinf(update_period)
            report_callback()
        end
    end
    file
end