24 #include "kmime_util.h" 25 #include "kmime_util_p.h" 29 #include "kmime_header_parsing.h" 30 #include "kmime_message.h" 31 #include "kmime_warning.h" 33 #include <config-kmime.h> 37 #include <klocalizedstring.h> 38 #include <kcharsets.h> 42 #include <QtCore/QList> 43 #include <QtCore/QString> 44 #include <QtCore/QTextCodec> 51 using namespace KMime;
55 QList<QByteArray> c_harsetCache;
56 QList<QByteArray> l_anguageCache;
57 QString f_allbackCharEnc;
58 bool u_seOutlookEncoding =
false;
60 QByteArray cachedCharset(
const QByteArray &name )
62 foreach (
const QByteArray& charset, c_harsetCache ) {
63 if ( qstricmp( name.data(), charset.data() ) == 0 ) {
68 c_harsetCache.append( name.toUpper() );
70 return c_harsetCache.last();
73 QByteArray cachedLanguage(
const QByteArray &name )
75 foreach (
const QByteArray& language, l_anguageCache ) {
76 if ( qstricmp( name.data(), language.data() ) == 0 ) {
81 l_anguageCache.append( name.toUpper() );
83 return l_anguageCache.last();
86 bool isUsAscii(
const QString &s )
88 uint sLength = s.length();
89 for ( uint i=0; i<sLength; i++ ) {
90 if ( s.at( i ).toLatin1() <= 0 ) {
100 case Headers::CE7Bit:
return QString::fromLatin1(
"7bit" );
101 case Headers::CE8Bit:
return QString::fromLatin1(
"8bit" );
102 case Headers::CEquPr:
return QString::fromLatin1(
"quoted-printable" );
103 case Headers::CEbase64:
return QString::fromLatin1(
"base64" );
104 case Headers::CEuuenc:
return QString::fromLatin1(
"uuencode" );
105 case Headers::CEbinary:
return QString::fromLatin1(
"binary" );
106 default:
return QString::fromLatin1(
"unknown" );
110 QList<Headers::contentEncoding> encodingsForData(
const QByteArray &data )
112 QList<Headers::contentEncoding> allowed;
115 switch ( cf.type() ) {
117 allowed << Headers::CE7Bit;
119 allowed << Headers::CE8Bit;
121 if ( cf.printableRatio() > 5.0/6.0 ) {
125 allowed << Headers::CEquPr;
126 allowed << Headers::CEbase64;
128 allowed << Headers::CEbase64;
129 allowed << Headers::CEquPr;
133 allowed << Headers::CEbase64;
144 const uchar specialsMap[16] = {
145 0x00, 0x00, 0x00, 0x00,
146 0x20, 0xCA, 0x00, 0x3A,
147 0x80, 0x00, 0x00, 0x1C,
148 0x00, 0x00, 0x00, 0x00
152 const uchar tSpecialsMap[16] = {
153 0x00, 0x00, 0x00, 0x00,
154 0x20, 0xC9, 0x00, 0x3F,
155 0x80, 0x00, 0x00, 0x1C,
156 0x00, 0x00, 0x00, 0x00
160 const uchar aTextMap[16] = {
161 0x00, 0x00, 0x00, 0x00,
162 0x5F, 0x35, 0xFF, 0xC5,
163 0x7F, 0xFF, 0xFF, 0xE3,
164 0xFF, 0xFF, 0xFF, 0xFE
168 const uchar tTextMap[16] = {
169 0x00, 0x00, 0x00, 0x00,
170 0x5F, 0x36, 0xFF, 0xC0,
171 0x7F, 0xFF, 0xFF, 0xE3,
172 0xFF, 0xFF, 0xFF, 0xFE
176 const uchar eTextMap[16] = {
177 0x00, 0x00, 0x00, 0x00,
178 0x40, 0x35, 0xFF, 0xC0,
179 0x7F, 0xFF, 0xFF, 0xE0,
180 0x7F, 0xFF, 0xFF, 0xE0
183 void setFallbackCharEncoding(
const QString& fallbackCharEnc)
185 f_allbackCharEnc = fallbackCharEnc;
188 QString fallbackCharEncoding()
190 return f_allbackCharEnc;
193 void setUseOutlookAttachmentEncoding(
bool violateStandard )
195 u_seOutlookEncoding = violateStandard;
198 bool useOutlookAttachmentEncoding()
200 return u_seOutlookEncoding;
204 QString decodeRFC2047String(
const QByteArray &src, QByteArray &usedCS,
205 const QByteArray &defaultCS,
bool forceCS )
209 QByteArray spaceBuffer;
210 spaceBuffer.reserve(64);
211 const char *scursor = src.constData();
212 const char *send = scursor + src.length();
213 bool onlySpacesSinceLastWord =
false;
215 while ( scursor != send ) {
217 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
218 spaceBuffer += *scursor++;
223 if ( *scursor ==
'=' ) {
227 const char *start = scursor;
228 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
229 result += decoded.toUtf8();
230 onlySpacesSinceLastWord =
true;
233 if ( onlySpacesSinceLastWord ) {
234 result += spaceBuffer;
235 onlySpacesSinceLastWord =
false;
243 if ( onlySpacesSinceLastWord ) {
244 result += spaceBuffer;
245 onlySpacesSinceLastWord =
false;
253 const QString tryUtf8 = QString::fromUtf8( result );
254 if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
255 QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
256 return codec->toUnicode( result );
262 QString decodeRFC2047String(
const QByteArray &src )
265 return decodeRFC2047String( src, usedCS,
"utf-8",
false );
268 static const char *reservedCharacters =
"\"()<>@,.;:\\[]=";
270 QByteArray encodeRFC2047String(
const QString &src,
const QByteArray &charset,
271 bool addressHeader,
bool allow8BitHeaders )
275 bool nonAscii=
false, ok=
true, useQEncoding=
false;
278 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok );
283 usedCS = KGlobal::locale()->encoding();
284 codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok );
287 if ( charset.isEmpty() ) {
288 usedCS = codec->name();
294 QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader );
295 QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState );
296 if ( converterState.invalidChars > 0 ) {
298 codec = QTextCodec::codecForName( usedCS );
299 encoded8Bit = codec->fromUnicode( src );
302 if ( usedCS.contains(
"8859-" ) ) {
306 if ( allow8BitHeaders ) {
310 uint encoded8BitLength = encoded8Bit.length();
311 for (
unsigned int i=0; i<encoded8BitLength; i++ ) {
312 if ( encoded8Bit[i] ==
' ' ) {
317 if ( ( (
signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] ==
'\033' ) ||
318 ( addressHeader && ( strchr(
"\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
326 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
331 for (
int x=end; x<encoded8Bit.length(); x++ ) {
332 if ( ( (
signed char)encoded8Bit[x] < 0 ) || ( encoded8Bit[x] ==
'\033' ) ||
333 ( addressHeader && ( strchr( reservedCharacters, encoded8Bit[x] ) != 0 ) ) ) {
336 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
343 result = encoded8Bit.left( start ) +
"=?" + usedCS;
345 if ( useQEncoding ) {
349 for (
int i=start; i<end; i++ ) {
354 if ( ( ( c >=
'a' ) && ( c <=
'z' ) ) ||
355 ( ( c >=
'A' ) && ( c <=
'Z' ) ) ||
356 ( ( c >=
'0' ) && ( c <=
'9' ) ) ) {
360 hexcode = ( ( c & 0xF0 ) >> 4 ) + 48;
361 if ( hexcode >= 58 ) {
365 hexcode = ( c & 0x0F ) + 48;
366 if ( hexcode >= 58 ) {
374 result +=
"?B?" + encoded8Bit.mid( start, end - start ).toBase64();
378 result += encoded8Bit.right( encoded8Bit.length() - end );
380 result = encoded8Bit;
386 QByteArray encodeRFC2047Sentence(
const QString& src,
const QByteArray& charset )
389 QList<QChar> splitChars;
390 splitChars << QLatin1Char(
',' ) << QLatin1Char(
'\"' ) << QLatin1Char(
';' ) << QLatin1Char(
'\\' );
391 const QChar *ch = src.constData();
392 const int length = src.length();
399 while ( pos < length ) {
401 const bool isAscii = ch->unicode() < 127;
402 const bool isReserved = ( strchr( reservedCharacters, ch->toLatin1() ) != 0 );
403 if ( isAscii && isReserved ) {
404 const int wordSize = pos - wordStart;
405 if ( wordSize > 0 ) {
406 const QString word = src.mid( wordStart, wordSize );
407 result += encodeRFC2047String( word, charset );
410 result += ch->toLatin1();
418 const int wordSize = pos - wordStart;
419 if ( wordSize > 0 ) {
420 const QString word = src.mid( wordStart, pos - wordStart );
421 result += encodeRFC2047String( word, charset );
430 QByteArray encodeRFC2231String(
const QString& str,
const QByteArray& charset )
432 if ( str.isEmpty() ) {
436 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
438 if ( charset ==
"us-ascii" ) {
439 latin = str.toLatin1();
440 }
else if ( codec ) {
441 latin = codec->fromUnicode( str );
443 latin = str.toLocal8Bit();
447 for ( l = latin.data(); *l; ++l ) {
448 if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) ) {
457 QByteArray result = charset +
"''";
458 for ( l = latin.data(); *l; ++l ) {
459 bool needsQuoting = ( *l & 0x80 ) || ( *l ==
'%' );
460 if ( !needsQuoting ) {
461 const QByteArray especials =
"()<>@,;:\"/[]?.= \033";
462 int len = especials.length();
463 for (
int i = 0; i < len; i++ ) {
464 if ( *l == especials[i] ) {
470 if ( needsQuoting ) {
472 unsigned char hexcode;
473 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
474 if ( hexcode >= 58 ) {
478 hexcode = ( *l & 0x0F ) + 48;
479 if ( hexcode >= 58 ) {
492 QString decodeRFC2231String(
const QByteArray &str, QByteArray &usedCS,
const QByteArray &defaultCS,
495 int p = str.indexOf(
'\'' );
497 return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) )->toUnicode( str );
501 QByteArray charset = str.left( p );
503 QByteArray st = str.mid( str.lastIndexOf(
'\'' ) + 1 );
507 while ( p < (
int)st.length() ) {
508 if ( st.at( p ) == 37 ) {
511 if ( p + 2 < st.length() ) {
512 ch = st.at( p + 1 ) - 48;
516 ch2 = st.at( p + 2 ) - 48;
520 st[p] = ch * 16 + ch2;
521 st.remove( p + 1, 2 );
526 kDebug() <<
"Got pre-decoded:" << st;
528 const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
529 if ( !charsetcodec || forceCS ) {
530 charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) );
533 usedCS = charsetcodec->name();
534 return charsetcodec->toUnicode( st );
537 QString decodeRFC2231String(
const QByteArray &src )
540 return decodeRFC2231String( src, usedCS,
"utf-8",
false );
543 QByteArray uniqueString()
545 static char chars[] =
"0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
549 unsigned int timeval;
553 ran = 1 + (int)( 1000.0 * rand() / ( RAND_MAX + 1.0 ) );
554 timeval = ( now / ran ) + getpid();
556 for (
int i = 0; i < 10; i++ ) {
557 pos = (int) ( 61.0 * rand() / ( RAND_MAX + 1.0 ) );
563 ret.setNum( timeval );
570 QByteArray multiPartBoundary()
572 return "nextPart" + uniqueString();
575 QByteArray unfoldHeader(
const QByteArray &header )
578 if ( header.isEmpty() ) {
582 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
583 while ( ( foldMid = header.indexOf(
'\n', pos ) ) >= 0 ) {
584 foldBegin = foldEnd = foldMid;
586 while ( foldBegin > 0 ) {
587 if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) {
593 while ( foldEnd <= header.length() - 1 ) {
594 if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) {
596 }
else if ( foldEnd > 0 && header[foldEnd - 1] ==
'\n' &&
597 header[foldEnd] ==
'=' && foldEnd + 2 < header.length() &&
598 ( ( header[foldEnd + 1] ==
'0' &&
599 header[foldEnd + 2] ==
'9' ) ||
600 ( header[foldEnd + 1] ==
'2' &&
601 header[foldEnd + 2] ==
'0' ) ) ) {
610 result += header.mid( pos, foldBegin - pos );
611 if ( foldEnd < header.length() - 1 ) {
616 const int len = header.length();
618 result += header.mid( pos, len - pos );
623 int findHeaderLineEnd(
const QByteArray &src,
int &dataBegin,
bool *folded )
626 int len = src.length() - 1;
632 if ( dataBegin < 0 ) {
637 if ( dataBegin > len ) {
645 if ( src.at( end ) ==
'\n' && end + 1 < len &&
646 ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ) ) {
653 if ( src.at( end ) !=
'\n' ) {
655 end = src.indexOf(
'\n', end + 1 );
656 if ( end == -1 || end == len ) {
659 }
else if ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ||
660 ( src[end + 1] ==
'=' && end + 3 <= len &&
661 ( ( src[end + 2] ==
'0' && src[end + 3] ==
'9' ) ||
662 ( src[end + 2] ==
'2' && src[end + 3] ==
'0' ) ) ) ) {
680 int indexOfHeader(
const QByteArray &src,
const QByteArray &name,
int &end,
int &dataBegin,
bool *folded )
686 if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
690 const char *p = strcasestr( src.constData(), n.constData() );
694 begin = p - src.constData();
700 dataBegin = begin + name.length() + 1;
702 if ( src.at( dataBegin ) ==
' ' ) {
705 end = findHeaderLineEnd( src, dataBegin, folded );
715 QByteArray extractHeader(
const QByteArray &src,
const QByteArray &name )
721 if ( src.isEmpty() || indexOfHeader( src, name, end, begin, &folded ) < 0 ) {
727 result = src.mid( begin, end - begin );
730 QByteArray hdrValue = src.mid( begin, end - begin );
731 result = unfoldHeader( hdrValue );
738 QList<QByteArray> extractHeaders(
const QByteArray &src,
const QByteArray &name )
742 QList<QByteArray> result;
743 QByteArray copySrc( src );
745 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
749 while ( begin >= 0 ) {
751 result.append( copySrc.mid( begin, end - begin ) );
753 QByteArray hdrValue = copySrc.mid( begin, end - begin );
754 result.append( unfoldHeader( hdrValue ) );
758 copySrc = copySrc.mid( end );
759 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
766 void removeHeader( QByteArray &header,
const QByteArray &name )
768 int begin, end, dummy;
769 begin = indexOfHeader( header, name, end, dummy );
771 header.remove( begin, end - begin + 1 );
775 QByteArray CRLFtoLF(
const QByteArray &s )
778 ret.replace(
"\r\n",
"\n" );
782 QByteArray CRLFtoLF(
const char *s )
785 return CRLFtoLF( ret );
788 QByteArray LFtoCRLF(
const QByteArray &s )
791 ret.replace(
'\n',
"\r\n" );
795 QByteArray LFtoCRLF(
const char *s )
798 return LFtoCRLF( ret );
802 template <
typename StringType,
typename CharType >
void removeQuotesGeneric( StringType & str )
804 bool inQuote =
false;
805 for (
int i = 0; i < str.length(); ++i ) {
806 if ( str[i] == CharType(
'"' ) ) {
811 if ( inQuote && ( str[i] == CharType(
'\\' ) ) ) {
819 void removeQuots( QByteArray &str )
821 removeQuotesGeneric<QByteArray, char>( str );
824 void removeQuots( QString &str )
826 removeQuotesGeneric<QString, QLatin1Char>( str );
829 template<
class StringType,
class CharType,
class CharConverterType,
class StringConverterType,
class ToString>
830 void addQuotes_impl( StringType &str,
bool forceQuotes )
832 bool needsQuotes=
false;
833 for (
int i=0; i < str.length(); i++ ) {
834 const CharType cur = str.at( i );
835 if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String(
"\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) {
838 if ( cur == CharConverterType(
'\\' ) || cur == CharConverterType(
'\"' ) ) {
839 str.insert( i, CharConverterType(
'\\' ) );
844 if ( needsQuotes || forceQuotes ) {
845 str.insert( 0, CharConverterType(
'\"' ) );
846 str.append( StringConverterType(
"\"" ) );
850 void addQuotes( QByteArray &str,
bool forceQuotes )
852 addQuotes_impl<QByteArray, char, char, char*, QLatin1String>( str, forceQuotes );
855 void addQuotes( QString &str,
bool forceQuotes )
857 addQuotes_impl<QString, QChar, QLatin1Char, QLatin1String, QString>( str, forceQuotes );
860 KMIME_EXPORT QString balanceBidiState(
const QString &input )
862 const int LRO = 0x202D;
863 const int RLO = 0x202E;
864 const int LRE = 0x202A;
865 const int RLE = 0x202B;
866 const int PDF = 0x202C;
868 QString result = input;
870 int openDirChangers = 0;
871 int numPDFsRemoved = 0;
872 for (
int i = 0; i < input.length(); i++ ) {
873 const ushort &code = input.at( i ).unicode();
874 if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
876 }
else if ( code == PDF ) {
877 if ( openDirChangers > 0 ) {
881 kWarning() <<
"Possible Unicode spoofing (unexpected PDF) detected in" << input;
882 result.remove( i - numPDFsRemoved, 1 );
888 if ( openDirChangers > 0 ) {
889 kWarning() <<
"Possible Unicode spoofing detected in" << input;
894 for (
int i = openDirChangers; i > 0; i-- ) {
895 if ( result.endsWith( QLatin1Char(
'"' ) ) ) {
896 result.insert( result.length() - 1, QChar( PDF ) );
898 result += QChar( PDF );
906 QString removeBidiControlChars(
const QString &input )
908 const int LRO = 0x202D;
909 const int RLO = 0x202E;
910 const int LRE = 0x202A;
911 const int RLE = 0x202B;
912 QString result = input;
913 result.remove( LRO );
914 result.remove( RLO );
915 result.remove( LRE );
916 result.remove( RLE );
920 static bool isCryptoPart(
Content* content )
932 const QByteArray lowerSubType = contentType->
subType().toLower();
933 return ( contentType->
mediaType().toLower() ==
"application" &&
934 ( lowerSubType ==
"pgp-encrypted" ||
935 lowerSubType ==
"pgp-signature" ||
936 lowerSubType ==
"pkcs7-mime" ||
937 lowerSubType ==
"x-pkcs7-mime" ||
938 lowerSubType ==
"pkcs7-signature" ||
939 lowerSubType ==
"x-pkcs7-signature" ||
940 ( lowerSubType ==
"octet-stream" &&
944 bool hasAttachment(
Content* content )
950 bool emptyFilename =
true;
953 emptyFilename =
false;
956 if ( emptyFilename &&
959 emptyFilename =
false;
963 if ( !emptyFilename && !isCryptoPart( content ) ) {
970 if ( hasAttachment( child ) ) {
978 bool hasInvitation(
Content *content )
984 if ( isInvitation(content) ) {
991 if ( hasInvitation( child ) ) {
999 bool isSigned(
Message *message )
1006 if ( contentType->
isSubtype(
"signed" ) ||
1007 contentType->
isSubtype(
"pgp-signature" ) ||
1008 contentType->
isSubtype(
"pkcs7-signature" ) ||
1009 contentType->
isSubtype(
"x-pkcs7-signature" ) ||
1011 message->
mainBodyPart(
"application/pgp-signature" ) ||
1012 message->
mainBodyPart(
"application/pkcs7-signature" ) ||
1013 message->
mainBodyPart(
"application/x-pkcs7-signature" ) ) {
1019 bool isEncrypted(
Message *message )
1026 if ( contentType->
isSubtype(
"encrypted" ) ||
1027 contentType->
isSubtype(
"pgp-encrypted" ) ||
1028 contentType->
isSubtype(
"pkcs7-mime" ) ||
1029 contentType->
isSubtype(
"x-pkcs7-mime" ) ||
1031 message->
mainBodyPart(
"application/pgp-encrypted" ) ||
1033 message->
mainBodyPart(
"application/x-pkcs7-mime" ) ) {
1040 bool isInvitation(
Content *content )
1048 if ( contentType && contentType->
isMediatype(
"text" ) && contentType->
isSubtype(
"calendar" ) ) {
This file is part of the API for handling MIME data and defines the Codec class.
List contents() const
For multipart contents, this will return a list of all multipart child contents.
Content * mainBodyPart(const QByteArray &type=QByteArray())
Returns the first main body part of a given type, taking multipart/mixed and multipart/alternative no...
Headers::ContentDisposition * contentDisposition(bool create=true)
Returns the Content-Disposition header.
This file is part of the API for handling MIME data and defines the CharFreq class.
Headers::ContentType * contentType(bool create=true)
Returns the Content-Type header.
Represents a (email) message.
A class that encapsulates MIME encoded Content.
A class for performing basic data typing using frequency count heuristics.