/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // --------------------------------------------------------------------------- // Includes // --------------------------------------------------------------------------- #include #include #include #include #include XERCES_CPP_NAMESPACE_BEGIN // --------------------------------------------------------------------------- // constants // --------------------------------------------------------------------------- static const int BASELENGTH = 255; static const int FOURBYTE = 4; // --------------------------------------------------------------------------- // class data member // --------------------------------------------------------------------------- // the base64 alphabet according to definition in RFC 2045 const XMLByte Base64::base64Alphabet[] = { chLatin_A, chLatin_B, chLatin_C, chLatin_D, chLatin_E, chLatin_F, chLatin_G, chLatin_H, chLatin_I, chLatin_J, chLatin_K, chLatin_L, chLatin_M, chLatin_N, chLatin_O, chLatin_P, chLatin_Q, chLatin_R, chLatin_S, chLatin_T, chLatin_U, chLatin_V, chLatin_W, chLatin_X, chLatin_Y, chLatin_Z, chLatin_a, chLatin_b, chLatin_c, chLatin_d, chLatin_e, chLatin_f, chLatin_g, chLatin_h, chLatin_i, chLatin_j, chLatin_k, chLatin_l, chLatin_m, chLatin_n, chLatin_o, chLatin_p, chLatin_q, chLatin_r, chLatin_s, chLatin_t, chLatin_u, chLatin_v, chLatin_w, chLatin_x, chLatin_y, chLatin_z, chDigit_0, chDigit_1, chDigit_2, chDigit_3, chDigit_4, chDigit_5, chDigit_6, chDigit_7, chDigit_8, chDigit_9, chPlus, chForwardSlash, chNull }; // This is an inverse table for base64 decoding. So, if // base64Alphabet[17] = 'R', then base64Inverse['R'] = 17. // // For characters not in base64Alphabet then // base64Inverse[ch] = 0xFF. const XMLByte Base64::base64Inverse[BASELENGTH] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3E, 0xFF, 0xFF, 0xFF, 0x3F, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; const XMLByte Base64::base64Padding = chEqual; /*** * * Memory Management Issue: * * . For memory allocated for result returned to caller (external memory), * the plugged memory manager is used if it is provided, otherwise global * new used to retain the pre-memory-manager behaviour. * * . For memory allocated for temperary buffer (internal memory), * XMLPlatformUtils::fgMemoryManager is used. * */ static void* getExternalMemory( MemoryManager* const allocator , XMLSize_t const sizeToAllocate) { return allocator ? allocator->allocate(sizeToAllocate) : ::operator new(sizeToAllocate); } /*** * internal memory is deallocated by janitorArray */ static void returnExternalMemory( MemoryManager* const allocator , void* buffer) { allocator ? allocator->deallocate(buffer) : ::operator delete(buffer); } /** * E2-9 * * Canonical-base64Binary ::= (B64line* B64lastline)? * * B64line ::= B64x15 B64x15 B64x15 B64x15 B64x15 B64 #xA * 76 Base64 characters followed by newline * * B64x15 ::= B64 B64 B64 B64 B64 * B64 B64 B64 B64 B64 * B64 B64 B64 B64 B64 * * B64lastline ::= B64x4? B64x4? B64x4? B64x4? * B64x4? B64x4? B64x4? B64x4? * B64x4? B64x4? B64x4? B64x4? * B64x4? B64x4? B64x4? B64x4? * B64x4? B64x4? * (B64x4 | (B64 B64 B16 '=') | (B64 B04 '==')) * #xA * * B64x4 ::= B64 B64 B64 B64 * B04 ::= [AQgw] * B16 ::= [AEIMQUYcgkosw048] */ // number of quadruplets per one line ( must be >1 and <19 ) const unsigned int Base64::quadsPerLine = 15; XMLByte* Base64::encode(const XMLByte* const inputData , const XMLSize_t inputLength , XMLSize_t* outputLength , MemoryManager* const memMgr) { if (!inputData || !outputLength) return 0; int quadrupletCount = ( (int)inputLength + 2 ) / 3; if (quadrupletCount == 0) return 0; // number of rows in encoded stream ( including the last one ) int lineCount = ( quadrupletCount + quadsPerLine-1 ) / quadsPerLine; // // convert the triplet(s) to quadruplet(s) // XMLByte b1, b2, b3, b4; // base64 binary codes ( 0..63 ) XMLSize_t inputIndex = 0; XMLSize_t outputIndex = 0; XMLByte *encodedData = (XMLByte*) getExternalMemory(memMgr, (quadrupletCount*FOURBYTE+lineCount+1) * sizeof(XMLByte)); // // Process all quadruplet(s) except the last // int quad = 1; for (; quad <= quadrupletCount-1; quad++ ) { // read triplet from the input stream split1stOctet( inputData[ inputIndex++ ], b1, b2 ); split2ndOctet( inputData[ inputIndex++ ], b2, b3 ); split3rdOctet( inputData[ inputIndex++ ], b3, b4 ); // write quadruplet to the output stream encodedData[ outputIndex++ ] = base64Alphabet[ b1 ]; encodedData[ outputIndex++ ] = base64Alphabet[ b2 ]; encodedData[ outputIndex++ ] = base64Alphabet[ b3 ]; encodedData[ outputIndex++ ] = base64Alphabet[ b4 ]; if (( quad % quadsPerLine ) == 0 ) encodedData[ outputIndex++ ] = chLF; } // // process the last Quadruplet // // first octet is present always, process it split1stOctet( inputData[ inputIndex++ ], b1, b2 ); encodedData[ outputIndex++ ] = base64Alphabet[ b1 ]; if( inputIndex < inputLength ) { // second octet is present, process it split2ndOctet( inputData[ inputIndex++ ], b2, b3 ); encodedData[ outputIndex++ ] = base64Alphabet[ b2 ]; if( inputIndex < inputLength ) { // third octet present, process it // no PAD e.g. 3cQl split3rdOctet( inputData[ inputIndex++ ], b3, b4 ); encodedData[ outputIndex++ ] = base64Alphabet[ b3 ]; encodedData[ outputIndex++ ] = base64Alphabet[ b4 ]; } else { // third octet not present // one PAD e.g. 3cQ= encodedData[ outputIndex++ ] = base64Alphabet[ b3 ]; encodedData[ outputIndex++ ] = base64Padding; } } else { // second octet not present // two PADs e.g. 3c== encodedData[ outputIndex++ ] = base64Alphabet[ b2 ]; encodedData[ outputIndex++ ] = base64Padding; encodedData[ outputIndex++ ] = base64Padding; } // write out end of the last line encodedData[ outputIndex++ ] = chLF; // write out end of string encodedData[ outputIndex ] = 0; *outputLength = outputIndex; return encodedData; } // // delete the buffer allocated by decode() if // decoding is successfully done. // // In previous version, we use XMLString::strLen(decodedData) // to get the length, this will fail for test case containing // consequtive "A", such "AAFF", or "ab56AA56". Instead of // returning 3/6, we have 0 and 3, indicating that "AA", after // decoded, is interpreted as by the strLen(). // // Since decode() has track of length of the decoded data, we // will get this length from decode(), instead of strLen(). // int Base64::getDataLength(const XMLCh* const inputData , MemoryManager* const manager , Conformance conform ) { XMLSize_t retLen = 0; XMLByte* decodedData = decodeToXMLByte(inputData, &retLen, manager, conform); if ( !decodedData ) return -1; else { returnExternalMemory(manager, decodedData); return (int)retLen; } } XMLByte* Base64::decode(const XMLByte* const inputData , XMLSize_t* decodedLength , MemoryManager* const memMgr , Conformance conform ) { XMLByte* canRepInByte = 0; XMLByte* retStr = decode( inputData , decodedLength , canRepInByte , memMgr , conform); /*** * Release the canRepData */ if (retStr) returnExternalMemory(memMgr, canRepInByte); return retStr; } XMLByte* Base64::decodeToXMLByte(const XMLCh* const inputData , XMLSize_t* decodedLen , MemoryManager* const memMgr , Conformance conform ) { if (!inputData || !*inputData) return 0; /*** * Move input data to a XMLByte buffer */ XMLSize_t srcLen = XMLString::stringLen(inputData); XMLByte *dataInByte = (XMLByte*) getExternalMemory(memMgr, (srcLen+1) * sizeof(XMLByte)); ArrayJanitor janFill(dataInByte, memMgr ? memMgr : XMLPlatformUtils::fgMemoryManager); for (XMLSize_t i = 0; i < srcLen; i++) dataInByte[i] = (XMLByte)inputData[i]; dataInByte[srcLen] = 0; /*** * Forward to the actual decoding method to do the decoding */ *decodedLen = 0; return decode(dataInByte, decodedLen, memMgr, conform); } /*** * E2-54 * * Canonical-base64Binary ::= (B64 B64 B64 B64)*((B64 B64 B16 '=')|(B64 B04 '=='))? * B04 ::= [AQgw] * B16 ::= [AEIMQUYcgkosw048] * B64 ::= [A-Za-z0-9+/] * ***/ XMLCh* Base64::getCanonicalRepresentation(const XMLCh* const inputData , MemoryManager* const memMgr , Conformance conform) { if (!inputData || !*inputData) return 0; /*** * Move input data to a XMLByte buffer */ XMLSize_t srcLen = XMLString::stringLen(inputData); XMLByte *dataInByte = (XMLByte*) getExternalMemory(memMgr, (srcLen+1) * sizeof(XMLByte)); ArrayJanitor janFill(dataInByte, memMgr ? memMgr : XMLPlatformUtils::fgMemoryManager); for (XMLSize_t i = 0; i < srcLen; i++) dataInByte[i] = (XMLByte)inputData[i]; dataInByte[srcLen] = 0; /*** * Forward to the actual decoding method to do the decoding */ XMLSize_t decodedLength = 0; XMLByte* canRepInByte = 0; XMLByte* retStr = decode( dataInByte , &decodedLength , canRepInByte , memMgr , conform); if (!retStr) return 0; /*** * Move canonical representation to a XMLCh buffer to return */ XMLSize_t canRepLen = XMLString::stringLen((char*)canRepInByte); XMLCh *canRepData = (XMLCh*) getExternalMemory(memMgr, (canRepLen + 1) * sizeof(XMLCh)); for (XMLSize_t j = 0; j < canRepLen; j++) canRepData[j] = (XMLCh)canRepInByte[j]; canRepData[canRepLen] = 0; /*** * Release the memory allocated in the actual decoding method */ returnExternalMemory(memMgr, retStr); returnExternalMemory(memMgr, canRepInByte); return canRepData; } // ----------------------------------------------------------------------- // Helper methods // ----------------------------------------------------------------------- // // return 0(null) if invalid data found. // return the buffer containning decoded data otherwise // the caller is responsible for the de-allocation of the // buffer returned. // // temporary data, rawInputData, is ALWAYS released by this function. // /*** * E2-9 * * Base64Binary ::= S? B64quartet* B64final? * * B64quartet ::= B64 S? B64 S? B64 S? B64 S? * * B64final ::= B64 S? B04 S? '=' S? '=' S? * | B64 S? B64 S? B16 S? '=' S? * * B04 ::= [AQgw] * B16 ::= [AEIMQUYcgkosw048] * B64 ::= [A-Za-z0-9+/] * * * E2-54 * * Base64Binary ::= ((B64S B64S B64S B64S)* * ((B64S B64S B64S B64) | * (B64S B64S B16S '=') | * (B64S B04S '=' #x20? '=')))? * * B64S ::= B64 #x20? * B16S ::= B16 #x20? * B04S ::= B04 #x20? * * * Note that this grammar requires the number of non-whitespace characters * in the lexical form to be a multiple of four, and for equals signs to * appear only at the end of the lexical form; strings which do not meet these * constraints are not legal lexical forms of base64Binary because they * cannot successfully be decoded by base64 decoders. * * Note: * The above definition of the lexical space is more restrictive than that given * in [RFC 2045] as regards whitespace -- this is not an issue in practice. Any * string compatible with the RFC can occur in an element or attribute validated * by this type, because the whiteSpace facet of this type is fixed to collapse, * which means that all leading and trailing whitespace will be stripped, and all * internal whitespace collapsed to single space characters, before the above grammar * is enforced. * */ XMLByte* Base64::decode ( const XMLByte* const inputData , XMLSize_t* decodedLength , XMLByte*& canRepData , MemoryManager* const memMgr , Conformance conform ) { if ((!inputData) || (!*inputData)) return 0; // // remove all XML whitespaces from the base64Data // XMLSize_t inputLength = XMLString::stringLen( (const char*)inputData ); XMLByte* rawInputData = (XMLByte*) getExternalMemory(memMgr, (inputLength+1) * sizeof(XMLByte)); ArrayJanitor jan(rawInputData, memMgr ? memMgr : XMLPlatformUtils::fgMemoryManager); XMLSize_t inputIndex = 0; XMLSize_t rawInputLength = 0; bool inWhiteSpace = false; switch (conform) { case Conf_RFC2045: while ( inputIndex < inputLength ) { if (!XMLChar1_0::isWhitespace(inputData[inputIndex])) { rawInputData[ rawInputLength++ ] = inputData[ inputIndex ]; } // RFC2045 does not explicitly forbid more than ONE whitespace // before, in between, or after base64 octects. // Besides, S? allows more than ONE whitespace as specified in the production // [3] S ::= (#x20 | #x9 | #xD | #xA)+ // therefore we do not detect multiple ws inputIndex++; } break; case Conf_Schema: // no leading #x20 if (chSpace == inputData[inputIndex]) return 0; while ( inputIndex < inputLength ) { if (chSpace != inputData[inputIndex]) { rawInputData[ rawInputLength++ ] = inputData[ inputIndex ]; inWhiteSpace = false; } else { if (inWhiteSpace) return 0; // more than 1 #x20 encountered else inWhiteSpace = true; } inputIndex++; } // no trailing #x20 if (inWhiteSpace) return 0; break; default: break; } //now rawInputData contains canonical representation //if the data is valid Base64 rawInputData[ rawInputLength ] = 0; // the length of raw data should be divisible by four if (( rawInputLength % FOURBYTE ) != 0 ) return 0; int quadrupletCount = (int)rawInputLength / FOURBYTE; if ( quadrupletCount == 0 ) return 0; // // convert the quadruplet(s) to triplet(s) // XMLByte d1, d2, d3, d4; // base64 characters XMLByte b1, b2, b3, b4; // base64 binary codes ( 0..64 ) XMLSize_t rawInputIndex = 0; XMLSize_t outputIndex = 0; XMLByte *decodedData = (XMLByte*) getExternalMemory(memMgr, (quadrupletCount*3+1) * sizeof(XMLByte)); // // Process all quadruplet(s) except the last // int quad = 1; for (; quad <= quadrupletCount-1; quad++ ) { // read quadruplet from the input stream if (!isData( (d1 = rawInputData[ rawInputIndex++ ]) ) || !isData( (d2 = rawInputData[ rawInputIndex++ ]) ) || !isData( (d3 = rawInputData[ rawInputIndex++ ]) ) || !isData( (d4 = rawInputData[ rawInputIndex++ ]) )) { // if found "no data" just return NULL returnExternalMemory(memMgr, decodedData); return 0; } b1 = base64Inverse[ d1 ]; b2 = base64Inverse[ d2 ]; b3 = base64Inverse[ d3 ]; b4 = base64Inverse[ d4 ]; // write triplet to the output stream decodedData[ outputIndex++ ] = set1stOctet(b1, b2); decodedData[ outputIndex++ ] = set2ndOctet(b2, b3); decodedData[ outputIndex++ ] = set3rdOctet(b3, b4); } // // process the last Quadruplet // // first two octets are present always, process them if (!isData( (d1 = rawInputData[ rawInputIndex++ ]) ) || !isData( (d2 = rawInputData[ rawInputIndex++ ]) )) { // if found "no data" just return NULL returnExternalMemory(memMgr, decodedData); return 0; } b1 = base64Inverse[ d1 ]; b2 = base64Inverse[ d2 ]; // try to process last two octets d3 = rawInputData[ rawInputIndex++ ]; d4 = rawInputData[ rawInputIndex++ ]; if (!isData( d3 ) || !isData( d4 )) { // check if last two are PAD characters if (isPad( d3 ) && isPad( d4 )) { // two PAD e.g. 3c== if ((b2 & 0xf) != 0) // last 4 bits should be zero { returnExternalMemory(memMgr, decodedData); return 0; } decodedData[ outputIndex++ ] = set1stOctet(b1, b2); } else if (!isPad( d3 ) && isPad( d4 )) { // one PAD e.g. 3cQ= b3 = base64Inverse[ d3 ]; if (( b3 & 0x3 ) != 0 ) // last 2 bits should be zero { returnExternalMemory(memMgr, decodedData); return 0; } decodedData[ outputIndex++ ] = set1stOctet( b1, b2 ); decodedData[ outputIndex++ ] = set2ndOctet( b2, b3 ); } else { // an error like "3c[Pad]r", "3cdX", "3cXd", "3cXX" where X is non data returnExternalMemory(memMgr, decodedData); return 0; } } else { // no PAD e.g 3cQl b3 = base64Inverse[ d3 ]; b4 = base64Inverse[ d4 ]; decodedData[ outputIndex++ ] = set1stOctet( b1, b2 ); decodedData[ outputIndex++ ] = set2ndOctet( b2, b3 ); decodedData[ outputIndex++ ] = set3rdOctet( b3, b4 ); } // write out the end of string decodedData[ outputIndex ] = 0; *decodedLength = outputIndex; //allow the caller to have access to the canonical representation jan.release(); canRepData = rawInputData; return decodedData; } bool Base64::isData(const XMLByte& octet) { return (base64Inverse[octet]!=(XMLByte)-1); } XERCES_CPP_NAMESPACE_END