/********************************************************************** * $Id$ * * Project: MapServer * Purpose: Utility functions to access files via HTTP (requires libcurl) * Author: Daniel Morissette, DM Solutions Group (morissette@dmsolutions.ca) * ********************************************************************** * Copyright (c) 2001-2003, Daniel Morissette, DM Solutions Group Inc * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies of this Software or works derived from this Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ****************************************************************************/ /* For now this code is enabled only when WMS/WFS client is enabled. * This should be changed to a test on the presence of libcurl which * is really what the real dependency is. */ #if defined(USE_WMS_LYR) || defined(USE_WFS_LYR) #include "mapserver.h" #include "maperror.h" #include "mapows.h" #include "mapthread.h" MS_CVSID("$Id$") #include #ifndef _WIN32 #include #include #endif /* * Note: This code uses libcurl to access remote files via the HTTP protocol. * Requires libcurl v 7.10 or more recent. * See http://curl.haxx.se/libcurl/c/ for the lib source code and docs. */ #include /********************************************************************** * msHTTPInit() * * This function is called to init libcurl before the first HTTP request * in this process is executed. * On further calls (when gbCurlInitialized = MS_TRUE) it simply doest nothing. * * Returns MS_SUCCESS/MS_FAILURE. * * msHTTPCleanup() will have to be called in msCleanup() when this process * exits. **********************************************************************/ static int gbCurlInitialized = MS_FALSE; int msHTTPInit() { /* curl_global_init() should only be called once (no matter how * many threads or libcurl sessions that'll be used) by every * application that uses libcurl. */ msAcquireLock(TLOCK_OWS); if (!gbCurlInitialized && curl_global_init(CURL_GLOBAL_ALL) != 0) { msReleaseLock(TLOCK_OWS); msSetError(MS_HTTPERR, "Libcurl initialization failed.", "msHTTPInit()"); return MS_FAILURE; } gbCurlInitialized = MS_TRUE; msReleaseLock(TLOCK_OWS); return MS_SUCCESS; } /********************************************************************** * msHTTPCleanup() * **********************************************************************/ void msHTTPCleanup() { msAcquireLock(TLOCK_OWS); if (gbCurlInitialized) curl_global_cleanup(); gbCurlInitialized = MS_FALSE; msReleaseLock(TLOCK_OWS); } /********************************************************************** * msHTTPInitRequestObj() * * Should be called on a new array of httpRequestObj to initialize them * for use with msHTTPExecuteRequest(), etc. * * Note that users of this module should always allocate and init one * more instance of httpRequestObj in their array than what they plan to * use because the terminate_handler() needs the last entry in the array * to have reqObj->request == NULL * **********************************************************************/ void msHTTPInitRequestObj(httpRequestObj *pasReqInfo, int numRequests) { int i; for(i=0; idebug) { msDebug("msHTTPWriteFct(id=%d, %d bytes)\n", psReq->nLayerId, size*nmemb); } return fwrite(buffer, size, nmemb, psReq->fp); } /********************************************************************** * msGetCURLAuthType() * * Returns the equivalent CURL CURLAUTH_ constant given a * MS_HTTP_AUTH_TYPE, or CURLAUTH_BASIC if no match is found. **********************************************************************/ long msGetCURLAuthType(enum MS_HTTP_AUTH_TYPE authType) { switch (authType) { case MS_BASIC: return CURLAUTH_BASIC; case MS_DIGEST: return CURLAUTH_DIGEST; case MS_NTLM: return CURLAUTH_NTLM; case MS_ANY: return CURLAUTH_ANY; case MS_ANYSAFE: return CURLAUTH_ANYSAFE; default: return CURLAUTH_BASIC; } } /********************************************************************** * msHTTPExecuteRequests() * * Fetch a map slide via HTTP request and save to specified temp file. * * If bCheckLocalCache==MS_TRUE then if the pszOutputfile already exists * then is is not downloaded again, and status 242 is returned. * * Return value: * MS_SUCCESS if all requests completed succesfully. * MS_FAILURE if a fatal error happened * MS_DONE if some requests failed with 40x status for instance (not fatal) **********************************************************************/ int msHTTPExecuteRequests(httpRequestObj *pasReqInfo, int numRequests, int bCheckLocalCache) { int i, nStatus = MS_SUCCESS, nTimeout, still_running=0, num_msgs=0; CURLM *multi_handle; CURLMsg *curl_msg; char debug = MS_FALSE; const char *pszCurlCABundle = NULL; if (numRequests == 0) return MS_SUCCESS; /* Nothing to do */ if (!gbCurlInitialized) msHTTPInit(); /* Establish the timeout (seconds) for how long we are going to wait * for a response. * We use the longest timeout value in the array of requests */ nTimeout = pasReqInfo[0].nTimeout; for (i=0; i nTimeout) nTimeout = pasReqInfo[i].nTimeout; if (pasReqInfo[i].debug) debug = MS_TRUE; /* For the download loop */ } if (nTimeout <= 0) nTimeout = 30; /* Check if we've got a CURL_CA_BUNDLE env. var. * If set then the value is the full path to the ca-bundle.crt file * e.g. CURL_CA_BUNDLE=/usr/local/share/curl/curl-ca-bundle.crt */ pszCurlCABundle = getenv("CURL_CA_BUNDLE"); if (debug) { msDebug("HTTP: Starting to prepare HTTP requests.\n"); if (pszCurlCABundle) msDebug("Using CURL_CA_BUNDLE=%s\n", pszCurlCABundle); } /* Alloc a curl-multi handle, and add a curl-easy handle to it for each * file to download. */ multi_handle = curl_multi_init(); if (multi_handle == NULL) { msSetError(MS_HTTPERR, "curl_multi_init() failed.", "msHTTPExecuteRequests()"); return(MS_FAILURE); } for (i=0; iversion_num/0x10000 & 0xff, psCurlVInfo->version_num/0x100 & 0xff, psCurlVInfo->version_num & 0xff ); } } if (pasReqInfo[i].pszUserAgent) { curl_easy_setopt(http_handle, CURLOPT_USERAGENT, pasReqInfo[i].pszUserAgent ); } /* Enable following redirections. Requires libcurl 7.10.1 at least */ curl_easy_setopt(http_handle, CURLOPT_FOLLOWLOCATION, 1 ); curl_easy_setopt(http_handle, CURLOPT_MAXREDIRS, 10 ); /* Set timeout.*/ curl_easy_setopt(http_handle, CURLOPT_TIMEOUT, nTimeout ); /* Pass CURL_CA_BUNDLE if set */ if (pszCurlCABundle) curl_easy_setopt(http_handle, CURLOPT_CAINFO, pszCurlCABundle ); /* Set proxying settings */ if (pasReqInfo[i].pszProxyAddress != NULL && strlen(pasReqInfo[i].pszProxyAddress) > 0) { long nProxyType = CURLPROXY_HTTP; curl_easy_setopt(http_handle, CURLOPT_PROXY, pasReqInfo[i].pszProxyAddress); if (pasReqInfo[i].nProxyPort > 0 && pasReqInfo[i].nProxyPort < 65535) { curl_easy_setopt(http_handle, CURLOPT_PROXYPORT, pasReqInfo[i].nProxyPort); } switch (pasReqInfo[i].eProxyType) { case MS_HTTP: nProxyType = CURLPROXY_HTTP; break; case MS_SOCKS5: nProxyType = CURLPROXY_SOCKS5; break; } curl_easy_setopt(http_handle, CURLOPT_PROXYTYPE, nProxyType); /* If there is proxy authentication information, set it */ if (pasReqInfo[i].pszProxyUsername != NULL && pasReqInfo[i].pszProxyPassword != NULL && strlen(pasReqInfo[i].pszProxyUsername) > 0 && strlen(pasReqInfo[i].pszProxyPassword) > 0) { char szUsernamePasswd[128]; #ifdef CURLOPT_PROXYAUTH long nProxyAuthType = CURLAUTH_BASIC; /* CURLOPT_PROXYAUTH available only in Curl 7.10.7 and up */ nProxyAuthType = msGetCURLAuthType(pasReqInfo[i].eProxyAuthType); curl_easy_setopt(http_handle, CURLOPT_PROXYAUTH, nProxyAuthType); #else /* We log an error but don't abort processing */ msSetError(MS_HTTPERR, "CURLOPT_PROXYAUTH not supported. Requires Curl 7.10.7 and up. *_proxy_auth_type setting ignored.", "msHTTPExecuteRequests()"); #endif /* CURLOPT_PROXYAUTH */ snprintf(szUsernamePasswd, 127, "%s:%s", pasReqInfo[i].pszProxyUsername, pasReqInfo[i].pszProxyPassword); curl_easy_setopt(http_handle, CURLOPT_PROXYUSERPWD, szUsernamePasswd); } } /* Set HTTP Authentication settings */ if (pasReqInfo[i].pszHttpUsername != NULL && pasReqInfo[i].pszHttpPassword != NULL && strlen(pasReqInfo[i].pszHttpUsername) > 0 && strlen(pasReqInfo[i].pszHttpPassword) > 0) { char szUsernamePasswd[128]; long nHttpAuthType = CURLAUTH_BASIC; snprintf(szUsernamePasswd, 127, "%s:%s", pasReqInfo[i].pszHttpUsername, pasReqInfo[i].pszHttpPassword); curl_easy_setopt(http_handle, CURLOPT_USERPWD, szUsernamePasswd); nHttpAuthType = msGetCURLAuthType(pasReqInfo[i].eHttpAuthType); curl_easy_setopt(http_handle, CURLOPT_HTTPAUTH, nHttpAuthType); } /* NOSIGNAL should be set to true for timeout to work in multithread * environments on Unix, requires libcurl 7.10 or more recent. * (this force avoiding the use of sgnal handlers) */ #ifdef CURLOPT_NOSIGNAL curl_easy_setopt(http_handle, CURLOPT_NOSIGNAL, 1 ); #endif /* Open output file and set write handler */ if ( (fp = fopen(pasReqInfo[i].pszOutputFile, "wb")) == NULL) { msSetError(MS_HTTPERR, "Can't open output file %s.", "msHTTPExecuteRequests()", pasReqInfo[i].pszOutputFile); return(MS_FAILURE); } pasReqInfo[i].fp = fp; curl_easy_setopt(http_handle, CURLOPT_WRITEDATA, &(pasReqInfo[i])); curl_easy_setopt(http_handle, CURLOPT_WRITEFUNCTION, msHTTPWriteFct); /* Provide a buffer where libcurl can write human readable error msgs */ if (pasReqInfo[i].pszErrBuf == NULL) pasReqInfo[i].pszErrBuf = (char *)malloc((CURL_ERROR_SIZE+1)* sizeof(char)); pasReqInfo[i].pszErrBuf[0] = '\0'; curl_easy_setopt(http_handle, CURLOPT_ERRORBUFFER, pasReqInfo[i].pszErrBuf); if(pasReqInfo[i].pszPostRequest != NULL ) { char szBuf[100]; struct curl_slist *headers=NULL; snprintf(szBuf, 100, "Content-Type: %s", pasReqInfo[i].pszPostContentType); headers = curl_slist_append(headers, szBuf); curl_easy_setopt(http_handle, CURLOPT_POST, 1 ); curl_easy_setopt(http_handle, CURLOPT_POSTFIELDS, pasReqInfo[i].pszPostRequest); curl_easy_setopt(http_handle, CURLOPT_HTTPHEADER, headers); /* curl_slist_free_all(headers); */ /* free the header list */ } /* Added by RFC-42 HTTP Cookie Forwarding */ if(pasReqInfo[i].pszHTTPCookieData != NULL) { /* Check if there's no end of line in the Cookie string */ /* This could break the HTTP Header */ int nPos; for(nPos=0; nPosmsg == CURLMSG_DONE && curl_msg->data.result != CURLE_OK) { /* Something went wrong with this transfer... report error */ for (i=0; ieasy_handle) { psReq = &(pasReqInfo[i]); break; } } if (psReq != NULL) { /* Record error code in nStatus as a negative value */ psReq->nStatus = -curl_msg->data.result; } } } if (debug) { /* Print a msDebug header for timings reported in the loop below */ msDebug("msHTTPExecuteRequests() timing summary per layer (connect_time + time_to_first_packet + download_time = total_time in seconds)\n"); } /* Check status of all requests, close files, report errors and cleanup * handles */ for (i=0; inStatus == 242) continue; /* Nothing to do here, this file was in cache already */ if (psReq->fp) fclose(psReq->fp); psReq->fp = NULL; http_handle = (CURL*)(psReq->curl_handle); if (psReq->nStatus == 0 && curl_easy_getinfo(http_handle, CURLINFO_HTTP_CODE, &lVal) == CURLE_OK) { char *pszContentType = NULL; psReq->nStatus = lVal; /* Fetch content type of response */ if (curl_easy_getinfo(http_handle, CURLINFO_CONTENT_TYPE, &pszContentType) == CURLE_OK && pszContentType != NULL) { psReq->pszContentType = strdup(pszContentType); } } if (!MS_HTTP_SUCCESS(psReq->nStatus)) { /* Set status to MS_DONE to indicate that transfers were */ /* completed but may not be succesfull */ nStatus = MS_DONE; if (psReq->nStatus == -(CURLE_OPERATION_TIMEOUTED)) { /* Timeout isn't a fatal error */ if (psReq->debug) msDebug("HTTP: TIMEOUT of %d seconds exceeded for %s\n", nTimeout, psReq->pszGetUrl ); msSetError(MS_HTTPERR, "HTTP: TIMEOUT of %d seconds exceeded for %s\n", "msHTTPExecuteRequests()", nTimeout, psReq->pszGetUrl); /* Rewrite error message, the curl timeout message isn't * of much use to our users. */ sprintf(psReq->pszErrBuf, "TIMEOUT of %d seconds exceeded.", nTimeout); } else if (psReq->nStatus > 0) { /* Got an HTTP Error, e.g. 404, etc. */ if (psReq->debug) msDebug("HTTP: HTTP GET request failed with status %d (%s)" " for %s\n", psReq->nStatus, psReq->pszErrBuf, psReq->pszGetUrl); msSetError(MS_HTTPERR, "HTTP GET request failed with status %d (%s) " "for %s", "msHTTPExecuteRequests()", psReq->nStatus, psReq->pszErrBuf, psReq->pszGetUrl); } else { /* Got a curl error */ if (psReq->debug) msDebug("HTTP: request failed with curl error " "code %d (%s) for %s", -psReq->nStatus, psReq->pszErrBuf, psReq->pszGetUrl); msSetError(MS_HTTPERR, "HTTP: request failed with curl error " "code %d (%s) for %s", "msHTTPExecuteRequests()", -psReq->nStatus, psReq->pszErrBuf, psReq->pszGetUrl); } } /* Report download times foreach handle, in debug mode */ if (psReq->debug) { double dConnectTime=0.0, dTotalTime=0.0, dStartTfrTime=0.0; curl_easy_getinfo(http_handle, CURLINFO_CONNECT_TIME, &dConnectTime); curl_easy_getinfo(http_handle, CURLINFO_STARTTRANSFER_TIME, &dStartTfrTime); curl_easy_getinfo(http_handle, CURLINFO_TOTAL_TIME, &dTotalTime); /* STARTTRANSFER_TIME includes CONNECT_TIME, but TOTAL_TIME * doesn't, so we need to add it. */ dTotalTime += dConnectTime; msDebug("Layer %d: %.3f + %.3f + %.3f = %.3fs\n", psReq->nLayerId, dConnectTime, dStartTfrTime-dConnectTime, dTotalTime-dStartTfrTime, dTotalTime); } /* Cleanup this handle */ curl_easy_setopt(http_handle, CURLOPT_URL, "" ); curl_multi_remove_handle(multi_handle, http_handle); curl_easy_cleanup(http_handle); psReq->curl_handle = NULL; } /* Cleanup multi handle, each handle had to be cleaned up individually */ curl_multi_cleanup(multi_handle); return nStatus; } /********************************************************************** * msHTTPGetFile() * * Wrapper to call msHTTPExecuteRequests() for a single file. **********************************************************************/ int msHTTPGetFile(const char *pszGetUrl, const char *pszOutputFile, int *pnHTTPStatus, int nTimeout, int bCheckLocalCache, int bDebug) { httpRequestObj *pasReqInfo; /* Alloc httpRequestInfo structs through which status of each request * will be returned. * We need to alloc 2 instance of requestobj so that the last * object in the array can be set to NULL. */ pasReqInfo = (httpRequestObj*)calloc(2, sizeof(httpRequestObj)); msHTTPInitRequestObj(pasReqInfo, 2); pasReqInfo[0].pszGetUrl = strdup(pszGetUrl); pasReqInfo[0].pszOutputFile = strdup(pszOutputFile); pasReqInfo[0].debug = (char)bDebug; if (msHTTPExecuteRequests(pasReqInfo, 1, bCheckLocalCache) != MS_SUCCESS) { *pnHTTPStatus = pasReqInfo[0].nStatus; if (pasReqInfo[0].debug) msDebug("HTTP request failed for %s.\n", pszGetUrl); msHTTPFreeRequestObj(pasReqInfo, 2); free(pasReqInfo); return MS_FAILURE; } *pnHTTPStatus = pasReqInfo[0].nStatus; msHTTPFreeRequestObj(pasReqInfo, 2); free(pasReqInfo); return MS_SUCCESS; } #endif /* defined(USE_WMS_LYR) || defined(USE_WMS_SVR) */