Libcurl: How to load URL with original name? (equivalent for "-O / - remote-name")

Question 1: When a url is loaded using libcurl, how to keep the original name of the loaded file? LibCurl asks the programmer to generate a filename. It can be easy when the url for example in the bottom url is easy to determine the target name vimqrc.pdf .

 http://tnerual.eriogerg.free.fr/vimqrc.pdf)  

      

but when the url dynamically generates the target name, for example the url is loaded with AdbeRdr1010_eu_ES.exe. with wget (no arguments except url) and curl (argument -O)

http://get.adobe.com/reader/download/?installer=Reader_10.1_Basque_for_Windows&standalone=1%22

      

How curl (-O) or wget determines the name

//invoked as ./a.out <URL>

#include <stdio.h>
#include <curl/curl.h>

char *location = "/tmp/test/out";

size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) {
    size_t written = fwrite(ptr, size, nmemb, stream);
    return written;
}

int main(int argc, char *argv[])
{
    CURL        *curl;
    CURLcode    res;
    int         ret = -1;


    if (argc!= 2) {
        //invoked as ./a.out <URL>
        return -1;
    } 

    curl = curl_easy_init();
    if (!curl) {
        goto bail;
    }

    FILE *fp = fopen(location, "wb");
    curl_easy_setopt(curl, CURLOPT_URL, argv[1]); //invoked as ./a.out <URL>
    /* example.com is redirected, so we tell libcurl to follow redirection */
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);

    /* Perform the request, res will get the return code */
    res = curl_easy_perform(curl);
    /* Check for errors */
    if(res != CURLE_OK)
        fprintf(stderr, "curl_easy_perform() failed: %s\n",
                curl_easy_strerror(res));

    /* always cleanup */
    curl_easy_cleanup(curl);
    ret = 0;
    fclose(fp);

bail:
    return ret;
}

      

+3


source to share


2 answers


I found the answer in the libcurl source code. It looks like the "remote name" is part of the "content-disposition" tag from the header. Libcurl parses the header and looks for "filename =" in the content-disposition tag. This parsing is done on the callback via the CURLOPT_HEADERFUNCTION parameter. Finally, in the callback for writing data (provided via CURLOPT_WRITEFUNCTION), this remote name is used to create the output file.

If the filename is missing, it is easy to figure out from the URL itself. This is pretty much code copied from the curl lib and minor modifications to make it simpler and meet my requirement.



#define _GNU_SOURCE 
#include <stdio.h>
#include <curl/curl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <stdlib.h>

typedef unsigned long uint64_t;
typedef struct {
    char        dnld_remote_fname[4096];
    char        dnld_url[4096]; 
    FILE        *dnld_stream;
    FILE        *dbg_stream;
    uint64_t    dnld_file_sz;
} dnld_params_t;

static int get_oname_from_cd(char const*const cd, char *oname)
{
    char    const*const cdtag   = "Content-disposition:";
    char    const*const key     = "filename=";
    int     ret                 = 0;
    char    *val                = NULL;

    /* Example Content-Disposition: filename=name1367; charset=funny; option=strange */

    /* If filename is present */
    val = strcasestr(cd, key);
    if (!val) {
        printf("No key-value for \"%s\" in \"%s\"", key, cdtag);
        goto bail;
    }

    /* Move to value */
    val += strlen(key);

    /* Copy value as oname */
    while (*val != '\0' && *val != ';') {
        //printf (".... %c\n", *val);
        *oname++ = *val++;
    }
    *oname = '\0';

bail:
    return ret;
}

static int get_oname_from_url(char const* url, char *oname)
{
    int         ret = 0;
    char const  *u  = url;

    /* Remove "http(s)://" */
    u = strstr(u, "://");
    if (u) {
        u += strlen("://");
    }

    u = strrchr(u, '/');

    /* Remove last '/' */
    u++;

    /* Copy value as oname */
    while (*u != '\0') {
        //printf (".... %c\n", *u);
        *oname++ = *u++;
    }
    *oname = '\0';

    return ret;
}

size_t dnld_header_parse(void *hdr, size_t size, size_t nmemb, void *userdata)
{
    const   size_t  cb      = size * nmemb;
    const   char    *hdr_str= hdr;
    dnld_params_t *dnld_params = (dnld_params_t*)userdata;
    char const*const cdtag = "Content-disposition:";

    /* Example: 
     * ...
     * Content-Type: text/html
     * Content-Disposition: filename=name1367; charset=funny; option=strange
     */
    if (strstr(hdr_str, "Content-disposition:")) {
        printf ("has c-d: %s\n", hdr_str);
    }

    if (!strncasecmp(hdr_str, cdtag, strlen(cdtag))) {
        printf ("Found c-d: %s\n", hdr_str);
        int ret = get_oname_from_cd(hdr_str+strlen(cdtag), dnld_params->dnld_remote_fname);
        if (ret) {
            printf("ERR: bad remote name");
        }
    }

    return cb;
}

FILE* get_dnld_stream(char const*const fname)
{
    char const*const pre = "/tmp/";
    char out[4096];

    snprintf(out, sizeof(out), "%s/%s", pre, fname);

    FILE *fp = fopen(out, "wb");
    if (!fp) {
        printf ("Could not create file %s\n", out);
    }

    return fp;
}

size_t write_cb(void *buffer, size_t sz, size_t nmemb, void *userdata)
{
    int ret = 0;
    dnld_params_t *dnld_params = (dnld_params_t*)userdata;

    if (!dnld_params->dnld_remote_fname[0]) {
        ret = get_oname_from_url(dnld_params->dnld_url, dnld_params->dnld_remote_fname);
    }

    if (!dnld_params->dnld_stream) {
        dnld_params->dnld_stream = get_dnld_stream(dnld_params->dnld_remote_fname);
    }

    ret = fwrite(buffer, sz, nmemb, dnld_params->dnld_stream);
    if (ret == (sz*nmemb)) {
       dnld_params->dnld_file_sz += ret;
    }
    return ret;
}


int download_url(char const*const url)
{
    CURL        *curl;
    int         ret = -1;
    CURLcode    cerr = CURLE_OK;
    dnld_params_t dnld_params;

    memset(&dnld_params, 0, sizeof(dnld_params));
    strncpy(dnld_params.dnld_url, url, strlen(url));

    curl = curl_easy_init();
    if (!curl) {
        goto bail;
    }

    cerr = curl_easy_setopt(curl, CURLOPT_URL, url);
    if (cerr) { printf ("%s: failed with err %d\n", "URL", cerr); goto bail;}

    cerr = curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, dnld_header_parse);
    if (cerr) { printf ("%s: failed with err %d\n", "HEADER", cerr); goto bail;}

    cerr = curl_easy_setopt(curl, CURLOPT_HEADERDATA, &dnld_params);
    if (cerr) { printf ("%s: failed with err %d\n", "HEADER DATA", cerr); goto bail;}

    cerr = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
    if (cerr) { printf ("%s: failed with err %d\n", "WR CB", cerr); goto bail;}

    cerr = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &dnld_params);
    if (cerr) { printf ("%s: failed with err %d\n", "WR Data", cerr); goto bail;}


    cerr = curl_easy_perform(curl);
    if(cerr != CURLE_OK) {
        fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(cerr));
    }

    printf ("Remote name: %s\n", dnld_params.dnld_remote_fname);
    fclose(dnld_params.dnld_stream);

    /* always cleanup */
    curl_easy_cleanup(curl);
    ret = 0;
    printf ("file size : %lu\n", dnld_params.dnld_file_sz);

bail:
    return ret;
}

int main(int argc, char *argv[])
{
    if (argc != 2) {
        printf ("Bad args\n");
        return -1;
    }
    return download_url(argv[1]);
}

      

+7


source


This is your program, not libcurl defining the filename. In your example, you can simply change char *location = "/tmp/test/out";

to char *location = "/tmp/test/vimqrc.pdf";

to get the effect you want .

If you want to get the path to the download file, programmed with the url and parent directory, you can do something like the following:



int url_to_location(char* location, unsigned int location_length, const char* url, const char* parent_directory)
{
    //char location[MAX_PATH];
    //const char *url = "http://tnerual.eriogerg.free.fr/vimqrc.pdf";
    //const char *parent_directory = "/tmp/test/";

    int last_slash_index = -1;
    int current_index = (int)strlen(url);
    while (current_index >= 0)
    {
        if (url[current_index] == '/')
        {
            last_slash_index = current_index;
            break;
        }
        current_index--;
    }
    unsigned int parent_directory_length = strlen(parent_directory)
    if (parent_directory_length <= location_length)
        return -1;
    strcpy(location, parent_directory);
    if (last_slash_index == -1) //no slashes found, use relative url as filename
    {
        if (parent_directory_length + strlen(url) <= location_length)
           return -1;

        strcat(location, url);
    }
    else    //use the characters of the url following the last slash as filename
    {
        if (parent_directory_length + strlen(url + last_slash_index + 1) <= location_length)
           return -1;

        strcat(location, url + last_slash_index + 1);
    }
    return strlen(location);
}

      

-2


source







All Articles