//************************************************************************************** // Filename: CConverter.cpp // Copyright © 1998 Tomasz Kukielka. All rights reserved. // // Description: // //************************************************************************************** // Revision History: // Sunday, September 13, 1998 - Original //************************************************************************************** #include #include #include "CConverter.h" //#include //#include #include "CUTextFileStream.h" #include "CEncodingArray.h" #include "CyclonePreferences.h" #include "StHandleUnlocker.h" #include "Alert.h" #include "CarbonScrap.h" #include #include "CThrownResult.h" #include "AMacHandle.h" #include "GetThrownOSStatus.h" extern CyclonePrefs *gPrefs; // --------------------------------------------------------------------------------- // ¥ ResizeOutputHandle // // This function increase the size of a locked handle by resizeBy and will // keep update a pointer into the handle if it happens to move after the resize // We'll just this routine in the convert methods when we run out of buffer // space and need to make room for more output // --------------------------------------------------------------------------------- //static void CConverter::ResizeOutputHandle( Handle h, char** outputPtr, ByteCount resizeBy ) { UInt32 oldSize=(UInt32)(*outputPtr) - (UInt32)(*h); StHandleUnlocker unlock(h);//destructor will lock the handle again ::SetHandleSize( h, GetHandleSize( h) + resizeBy); CThrownOSErr err = ::MemError(); *outputPtr=(char*)( (UInt32)(*h) + oldSize); } // --------------------------------------------------------------------------------- // ¥ BufToHex // --------------------------------------------------------------------------------- void BufToHex( const unsigned char* src, unsigned char* dest, ByteCount srcLen, ByteCount &destLen, UInt8 clumpSize); void BufToHex( const unsigned char* src, unsigned char* dest, ByteCount srcLen, ByteCount &destLen, UInt8 clumpSize) { unsigned char *hex=(unsigned char*)"0123456789ABCDEF"; UInt8 tempClumpSize = clumpSize; destLen=0; while(srcLen--) { *dest++ = *( hex + ((*src&0xF0)>>4) ); *dest++ = *( hex + ( *src&0x0F ) ); destLen+=2; if( clumpSize != 0 ) { if( --tempClumpSize == 0 ) { *dest++ = ' '; tempClumpSize = clumpSize; destLen++; } } src++; } } LArray* CConverter::sAllTextEncodingsArr = nil; #pragma mark - #pragma mark == begin old routines == /* OSStatus CConverter::DoConvertToUnicode( TextEncoding unicodeEncoding, TextEncoding fromEncoding, Handle srcH, Handle destH, ByteCount &inputRead, ByteCount &unicodeLen) { OSStatus status; TextToUnicodeInfo textToUnicodeInfo; UnicodeMapping theMapping; ByteCount origOutputLen; ByteCount maxOutput = ::GetHandleSize(destH); ByteCount srcLen = ::GetHandleSize(srcH); inputRead=0; unicodeLen=0; // Resize our output buffer if necessary to the minimum buffer we recommended earlier if( maxOutput < kMinimumBufferSize ) { maxOutput=kMinimumBufferSize; StHandleUnlocker unlock(destH);//destructor will lock the handle again ::SetHandleSize(destH, maxOutput); } origOutputLen=maxOutput; // Create a unicodeMapping holding the specified source encoding and the target unicode encoding theMapping.unicodeEncoding = unicodeEncoding; theMapping.otherEncoding = fromEncoding; theMapping.mappingVersion = kUnicodeUseLatestMapping; // Create a conversion context from the specified mapping status=::CreateTextToUnicodeInfo( &theMapping, &textToUnicodeInfo); if( status != noErr) return status; OptionBits convOptions = ( kUnicodeUseFallbacksMask | kUnicodeKeepInfoMask ) ; StHandleLocker theSLock(srcH); StHandleLocker theDLock(destH); char *src=*srcH; char *dest= *destH; // Call ConvertFromTextToUnicode as many times as necessary to convert the whole input stream to unicode. // The output handle will get resized if necessary to accomodate the conversion of the whole input stream do{ ByteCount tSourceRead, tUnicodeLen; status = ::ConvertFromTextToUnicode(textToUnicodeInfo, srcLen, (ConstLogicalAddress) src, convOptions, 0, nil, nil, nil, maxOutput, &tSourceRead, &tUnicodeLen, (UniCharArrayPtr)dest); //check to see if we need to adjust our output buffer size. See discussion of this in ConvertFromMulti method. if( (status == kTECOutputBufferFullStatus) || (status == kTECBufferBelowMinimumSizeErr) ) { ResizeOutputHandle( destH, &dest, origOutputLen ); dest=(char*)( (UInt32)dest + tUnicodeLen); maxOutput=origOutputLen+(maxOutput-tUnicodeLen); src=(char*)( (UInt32)src + tSourceRead); srcLen-=tSourceRead; } inputRead+=tSourceRead; unicodeLen+=tUnicodeLen; } while( (status == kTECOutputBufferFullStatus) || (status == kTECBufferBelowMinimumSizeErr) ); ::DisposeTextToUnicodeInfo( &textToUnicodeInfo ); if(status == noErr) { Size newHSize = ::GetHandleSize(destH); if( unicodeLen < newHSize )//should be equal, but not always - so in this case clip the trailing trash { theDLock.Release(); ::SetHandleSize(destH, unicodeLen); } } return status; } */ /* OSStatus CConverter::DoConvertToEncoding( TextEncoding unicodeEncoding, TextEncoding toEncoding, Handle srcH, Handle destH, ByteCount &inputRead, ByteCount &outputLen ) { OSStatus status; UnicodeToTextInfo unicodeToTextInfo; UnicodeMapping theMapping; ByteCount origOutputLen; ByteCount maxOutput = ::GetHandleSize(destH); ByteCount srcLen = ::GetHandleSize(srcH); inputRead=0; outputLen=0; // Resize our output buffer if necessary to the minimum buffer we recommended earlier if( maxOutput < kMinimumBufferSize ) { maxOutput=kMinimumBufferSize; StHandleUnlocker unlock(destH); ::SetHandleSize(destH, maxOutput); } origOutputLen=maxOutput; // Create a unicodeMapping holding the specified source encoding and the target unicode encoding theMapping.unicodeEncoding = unicodeEncoding; theMapping.otherEncoding = toEncoding; theMapping.mappingVersion = kUnicodeUseLatestMapping; // Create a conversion context from the specified mapping status=::CreateUnicodeToTextInfo( &theMapping, &unicodeToTextInfo); if( status != noErr) return status; OptionBits convOptions = ( kUnicodeUseFallbacksMask | kUnicodeKeepInfoMask ) ; StHandleLocker theSLock(srcH); StHandleLocker theDLock(destH); char *src=*srcH; char *dest=*destH; // Call ConvertFromUnicodeToText as many times as necessary to convert the whole unicode stream. // The output handle will get resized if necessary to accomodate the conversion of the whole input stream do{ ByteCount tSourceRead, tUnicodeLen; status = ::ConvertFromUnicodeToText(unicodeToTextInfo, srcLen, (ConstUniCharArrayPtr) src, convOptions, 0, nil, nil, nil, maxOutput, &tSourceRead, &tUnicodeLen, (LogicalAddress)dest); //check to see if we need to adjust our output buffer size. See discussion of this in ConvertFromMulti method. if( (status == kTECOutputBufferFullStatus) || (status == kTECBufferBelowMinimumSizeErr) ) { ResizeOutputHandle( destH, &dest, origOutputLen ); dest=(char*)( (UInt32)dest + tUnicodeLen); maxOutput=origOutputLen+(maxOutput-tUnicodeLen); src=(char*)( (UInt32)src + tSourceRead); srcLen-=tSourceRead; } inputRead+=tSourceRead; outputLen+=tUnicodeLen; } while( (status == kTECOutputBufferFullStatus) || (status == kTECBufferBelowMinimumSizeErr) ); ::DisposeUnicodeToTextInfo( &unicodeToTextInfo ); ::HUnlock(srcH); ::HUnlock(destH); if(status == noErr) { Size newHSize = ::GetHandleSize(destH); if( outputLen < newHSize )//should be equal, but not always - so in this case clip the trailing trash { theDLock.Release(); ::SetHandleSize(destH, outputLen); } } return status; } */ //all alerts are done inside - except for unexpected throws - which will be caught outside OSStatus CConverter::ConvertClipboard(TextEncoding inputEncoding, TextEncoding outputEncoding, UInt16 lineBreak) { OSStatus status = noErr; Handle srcH = nil, destH = nil; ScrapFlavorType dataType = typeText; bool startWithUnicodeScrap = IsTwoByteUnicode( inputEncoding ); if( startWithUnicodeScrap )//2 byte unicode input encoding is selected as - use 'utxt' format first, then check 'TEXT' { dataType = typeUnicodeText; } ScrapRef currScrap = CarbonScrap::GetCurrentScrap(); SInt32 srcSize = CarbonScrap::GetDataSize(currScrap, dataType); bool hasData = (srcSize > 0); if( !hasData ) { if( startWithUnicodeScrap ) dataType = typeText; else dataType = typeUnicodeText; srcSize = CarbonScrap::GetDataSize(currScrap, dataType); hasData = (srcSize > 0); if( !hasData ) { StopAlertResID(10, errEmptyScrap);//Clipboard contains no text. return errEmptyScrap; } } srcH = ::NewHandle(srcSize); if(srcH == NULL) { StopAlertResID(11, nilHandleErr);//Not enough memory to perform conversion. return nilHandleErr; } AHandle srcHandle(srcH); // srcHandle.Lock(); // currScrap.GetFlavorData(dataType, srcSize, *srcH); srcSize = CarbonScrap::GetData(currScrap, dataType, srcH); if(srcSize == 0) { StopAlertResID(10, errEmptyScrap);//Clipboard contains no text. return errEmptyScrap; } destH = ::NewHandle(srcSize); if(destH == NULL) { StopAlertResID(11, nilHandleErr);//Not enough memory to perform conversion. return nilHandleErr; } AHandle destDel(destH); try { status = ConvertFromOneEncodingToAnother( inputEncoding, outputEncoding, srcH, destH); srcHandle.Dispose(); ByteCount destLen = ::GetHandleSize(destH); ByteCount destBuffSize = destLen; if(lineBreak == kLineBreakAutoChange) { TextEncodingBase theBase = ::GetTextEncodingBase(outputEncoding); if((theBase >= kStandardDOS) && (theBase < kStandardOther)) { destBuffSize = 2*destLen; ::SetHandleSize(destH, destBuffSize ); CThrownOSErr err = ::MemError(); } destDel.Lock(); AutoCorrectBreaks( (TextPtr)*destH, destLen, destBuffSize, outputEncoding); destDel.Unlock(); } else { if(lineBreak == kLineBreakWindows) { destBuffSize = 2*destLen; ::SetHandleSize(destH, destBuffSize ); CThrownOSErr err = ::MemError(); } destDel.Lock(); ChangeBreaks( (TextPtr)*destH, destLen, destBuffSize, outputEncoding, lineBreak); destDel.Unlock(); } if( destLen != ::GetHandleSize(destH) ) {//the buffer may be longer after line break change - cut it now ::SetHandleSize(destH, destLen ); CThrownOSErr err = ::MemError(); } } catch(...) { status = GetThrownOSStatus(); StopAlertResID(4, status );//Clipboard conversion failed. return status; } if(status == noErr) { if( IsTwoByteUnicode( outputEncoding ) ) dataType = typeUnicodeText; else dataType = typeText; destDel.Lock(); CarbonScrap::SetData( currScrap, dataType, *destH, ::GetHandleSize(destH), true ); } if(status != noErr) StopAlertResID(4, status );//Clipboard conversion failed. return status; } #pragma mark - //based on DoConvertUsingHLC from TEC 1.5 SDK sample code //Flushing is implemented inside, //so it is best to call this function for complete text (not for fragment) OSStatus CConverter::ConvertFromOneEncodingToAnother(TextEncoding inputEncoding, TextEncoding outputEncoding, Handle srcH, Handle destH) { if( (srcH == nil) || (destH == nil) ) return noErr;//or what? TECObjectRef encodingConverter; OSStatus status = ::TECCreateConverter( &encodingConverter, inputEncoding, outputEncoding); if(status != noErr) return status; ByteCount origOutputLen; ByteCount outputLen = 0; ByteCount maxOutput = ::GetHandleSize(destH); ByteCount srcLen = ::GetHandleSize(srcH); if(srcLen == 0) return noErr; /* Resize our output buffer if necessary to the minimum buffer we recommended earlier*/ if( maxOutput < kMinimumBufferSize ) { maxOutput = kMinimumBufferSize; ::HUnlock(destH); ::SetHandleSize(destH, maxOutput); } origOutputLen = maxOutput; ::HLock(srcH); ::HLock(destH); char *src = *srcH; char *dest = *destH; //always handle BOMs for 2 byte Unicode input encoding if( IsTwoByteUnicode( inputEncoding ) ) { if( (*(UniChar *)src) == kMacByteOrderChar) { src += sizeof(UniChar); srcLen -= sizeof(UniChar); } else if( (*(UniChar *)src) == kPCByteOrderChar ) { src += sizeof(UniChar); srcLen -= sizeof(UniChar); CUTextFileStream::InvertByteOrder( (UniChar *)src, srcLen/sizeof(UniChar) ); } } Boolean needsToFlush = true; do { ByteCount tSourceRead, tOutputLen; if( srcLen > 0 ) { status = ::TECConvertText( encodingConverter, (ConstStringPtr)src, srcLen, &tSourceRead, (StringPtr)dest, maxOutput, &tOutputLen); } else { status = ::TECFlushText( encodingConverter, (TextPtr) dest, maxOutput, &tOutputLen); needsToFlush = false; } if( (status == kTECOutputBufferFullStatus) || (status == kTECBufferBelowMinimumSizeErr) ) { ResizeOutputHandle( destH, &dest, origOutputLen ); maxOutput = origOutputLen + (maxOutput - tOutputLen); //It could happen that we set the needsToFlush flag to false prematurely, so reset it. if(needsToFlush == false ) needsToFlush = true; } else if( (status != noErr) && (status != kTECUsedFallbacksStatus) ) { break; } else maxOutput -= tOutputLen; outputLen += tOutputLen; dest = (char*)( (UInt32)dest + tOutputLen); if( srcLen > 0 ) { src = (char*)( (UInt32)src + tSourceRead); srcLen -= tSourceRead; } } while( (status == kTECOutputBufferFullStatus) || (status == kTECBufferBelowMinimumSizeErr) || needsToFlush ); ::HUnlock(srcH); ::HUnlock(destH); if( (status == noErr) || (status == kTECUsedFallbacksStatus) ) { Size newHSize = ::GetHandleSize(destH); if( outputLen < newHSize )//should be equal, but not always - so in this case clip the trailing trash ::SetHandleSize(destH, outputLen); } status = ::TECDisposeConverter(encodingConverter); return status; } #pragma mark - bool CConverter::IsTwoByteUnicode( TextEncoding inEnc ) { TextEncodingBase encBase = ::GetTextEncodingBase( inEnc ); if( (encBase >= kStandardUnicode) && (encBase < kStandardISO) ) { return (::GetTextEncodingFormat( inEnc ) == kUnicode16BitFormat); } return false; } /* //function returns kMacByteOrderChar or kPCByteOrderChar UniChar CConverter::GuessByteOrder(UniCharPtr uniTextPtr, UniCharCount uniTextLen) { UniChar uniChar = kMacByteOrderChar; UInt32 index = 0, hiZeroCount = 0, loZeroCount = 0; while(index < uniTextLen) { uniChar = uniTextPtr[index]; index++; if( HiByte(uniChar) == 0 ) hiZeroCount++; if( LoByte(uniChar) == 0 ) loZeroCount++; if( (index >10) && (hiZeroCount != loZeroCount) ) //gather at least 10 chars { uniChar = (loZeroCount > hiZeroCount) ? kPCByteOrderChar : kMacByteOrderChar; break; } } return uniChar; } */ /* Boolean CConverter::ImportTextToUnicode(FSSpec *inOriginalDocSpec, TextEncoding inOriginalEncoding, Handle &outHandle) { OSStatus status = noErr; Handle localFromText = nil; ByteCount inputRead = 0; ByteCount outputLen = 0; bool isTwoByteUnicode = IsTwoByteUnicode( inOriginalEncoding ); if( isTwoByteUnicode ) localFromText = CUTextFileStream::OpenUTextFile( *inOriginalDocSpec ); else localFromText = CUTextFileStream::OpenTextFile( *inOriginalDocSpec ); if(localFromText == nil) goto CleanUpAndExit; ByteCount toTextSize = ::GetHandleSize(localFromText); //unicode will be roughly 2 times bigger //in normal case - handle size is adjusted UniCharCount uniTextLen = toTextSize/sizeof(UniChar); if( isTwoByteUnicode ) {//it is Unicode 16bit - our format - no need for conversion outHandle = localFromText; localFromText = nil;//prevent disposing of newly aquired mem area } else {//original text is not Unicode 16 bit toTextSize += sizeof(UniChar); outHandle = ::NewHandleClear(toTextSize); if(outHandle == nil) goto CleanUpAndExit; TextEncoding unicodeEncoding = ::ResolveDefaultTextEncoding(kTextEncodingUnicodeDefault); status = CConverter::DoConvertToUnicode( unicodeEncoding, inOriginalEncoding, localFromText, outHandle, inputRead, outputLen); } if(localFromText != nil) { ::DisposeHandle(localFromText); localFromText = nil; } if( (status == noErr) || (status == kTECUsedFallbacksStatus) ) return true; CleanUpAndExit: SignalStringLiteral_("Unicode conversion failed"); if(localFromText != nil) { ::DisposeHandle(localFromText); localFromText = nil; } if(outHandle != nil) { ::DisposeHandle(outHandle); outHandle = nil; } return false; } */ /* Boolean CConverter::OpenConvertSave(TextEncoding inputEncoding, TextEncoding outputEncoding, FSSpec *inOrigSpec) { Handle localFromText = nil; try { if( IsTwoByteUnicode( inputEncoding ) ) localFromText = CUTextFileStream::OpenUTextFile( *inOrigSpec ); else localFromText = CUTextFileStream::OpenTextFile( *inOrigSpec ); } catch(const LException& inException) { SignalStringLiteral_("Error loading text file. Too little memory?"); goto CleanupNoSignal; } if(localFromText == nil) goto CleanUpAndExit; Handle outputHandle = nil; OSStatus status = noErr; if(inputEncoding == outputEncoding) { outputHandle = localFromText; localFromText = nil;//prevent disposing of newly aquired mem area } else { outputHandle = ::NewHandleClear( ::GetHandleSize(localFromText) );//allocate the same size if(outputHandle == nil) goto CleanUpAndExit; try { status = ConvertFromOneEncodingToAnother(inputEncoding, outputEncoding, localFromText, outputHandle); } catch(const LException& inException) { goto CleanUpAndExit; } } if(localFromText != nil) { ::DisposeHandle(localFromText); localFromText = nil; } if( (status == noErr) || (status == kTECUsedFallbacksStatus) ) { Boolean replacing = false; FSSpec newFileSpec; PP_StandardDialogs::LFileDesignator designator; OSType theFileType = kTextFileType; if( IsTwoByteUnicode( outputEncoding ) ) theFileType = kUniTextFileType; designator.SetFileType( theFileType ); NavDialogOptions* options = designator.GetDialogOptions(); if(options != nil) { options->dialogOptionFlags = kNavDefaultNavDlogOptions | kNavNoTypePopup | kNavDontAutoTranslate; ::GetIndString( options->windowTitle, STRx_Standards, 2);//save file as } Boolean outValue = designator.AskDesignateFile( inOrigSpec->name ); if (outValue) { designator.GetFileSpec(newFileSpec); replacing = designator.IsReplacing(); if(replacing) { status = ::FSpDelete(&newFileSpec);//delete it if( (status != noErr) && (status != fnfErr) ) { ::DisposeHandle(outputHandle); outputHandle = nil; return false;//could not remove file! } } LFileStream *newFile = nil; if( IsTwoByteUnicode( outputEncoding ) ) {//unicode newFile = NEW CUTextFileStream(newFileSpec); } else { newFile = NEW LFileStream(newFileSpec); } //LFile newFile( newFileSpec ); try { newFile->CreateNewDataFile( kAppSignature, theFileType ); newFile->OpenDataFork( fsRdWrPerm ); StHandleLocker theLock( outputHandle ); newFile->WriteDataFork( *outputHandle, ::GetHandleSize(outputHandle) ); //newFile->CloseDataFork(); - destructor will close the file } catch(const LException& inException) { status = ::FSpDelete(&newFileSpec); SignalStringLiteral_("Could not write the file."); outValue = false; } delete newFile; } ::DisposeHandle(outputHandle); outputHandle = nil; return outValue; } CleanUpAndExit: SignalStringLiteral_("Conversion failed!"); CleanupNoSignal: if(localFromText != nil) { ::DisposeHandle(localFromText); localFromText = nil; } if(outputHandle != nil) { ::DisposeHandle(outputHandle); outputHandle = nil; } return false; } */ /* Boolean CConverter::ExportUnicodeToEncodingText(Handle origHandle, Str255 defaultName, TextEncoding textEncoding) { Assert_(origHandle); Boolean outValue = true; Handle newH = nil; Handle saveTextH = nil; ByteCount saveTextLen = 0; ByteCount origSize = ::GetHandleSize(origHandle); OSType theFileType = '????'; OSStatus status = noErr; if( IsTwoByteUnicode( textEncoding ) )//the same as original - simple save-as { saveTextH = origHandle; saveTextLen = origSize; theFileType = kUniTextFileType; } else { newH = ::NewHandleClear( origSize/2 );//two times smaller roughly - will be adjusted by DoConvertToEncoding() anyway if(newH == nil) return false; ByteCount inputRead; TextEncoding unicodeEncoding = ::ResolveDefaultTextEncoding(kTextEncodingUnicodeDefault); status = CConverter::DoConvertToEncoding( unicodeEncoding, textEncoding, origHandle, newH, inputRead, saveTextLen); if( (status != noErr) && (status != kTECUsedFallbacksStatus) ) { ::DisposeHandle(newH); return false; } saveTextH = newH; theFileType = kTextFileType; } if( (status == noErr) || (status == kTECUsedFallbacksStatus) ) { Boolean replacing = false; FSSpec newFileSpec; PP_StandardDialogs::LFileDesignator designator; designator.SetFileType( theFileType ); NavDialogOptions* options = designator.GetDialogOptions(); if(options != nil) { options->dialogOptionFlags = kNavDefaultNavDlogOptions | kNavNoTypePopup | kNavDontAutoTranslate; ::GetIndString( options->windowTitle, STRx_Standards, 2);//save file as } outValue = designator.AskDesignateFile( defaultName ); if (outValue) { designator.GetFileSpec(newFileSpec); replacing = designator.IsReplacing(); if(replacing) { status = ::FSpDelete(&newFileSpec);//delete it if( (status != noErr) && (status != fnfErr) ) { ::DisposeHandle(newH); newH = nil; return false;//could not remove file! } } LFileStream *newFile = nil; if( IsTwoByteUnicode( textEncoding ) ) {//unicode newFile = NEW CUTextFileStream(newFileSpec); } else { newFile = NEW LFileStream(newFileSpec); } newFile->CreateNewDataFile( kAppSignature, kTextFileType ); newFile->OpenDataFork( fsRdWrPerm ); StHandleLocker theLock( saveTextH ); newFile->WriteDataFork( *saveTextH, saveTextLen ); newFile->CloseDataFork(); delete newFile; } } if(newH != nil) ::DisposeHandle(newH); return outValue; } */ #pragma mark === end old routines === #pragma mark - Boolean CConverter::GetAllEncodingMappings() { OSStatus status; UInt32 mappingCount; TextEncoding **allTextEncodingsH = nil; status = ::TECCountAvailableTextEncodings( &mappingCount); if( (status == noErr) && (mappingCount > 0) ) { if(sAllTextEncodingsArr != nil) delete sAllTextEncodingsArr; allTextEncodingsH = (TextEncoding **) ::NewHandle( mappingCount * sizeof(TextEncoding) ); if(allTextEncodingsH != nil) { ::HLock( (Handle)allTextEncodingsH ); status = ::TECGetAvailableTextEncodings( *allTextEncodingsH, mappingCount, &mappingCount); if( (status == noErr) && (mappingCount > 0) ) { ::HUnlock( (Handle)allTextEncodingsH ); if( mappingCount < ( ::GetHandleSize( (Handle)allTextEncodingsH )/sizeof(TextEncoding) ) ) ::SetHandleSize( (Handle)allTextEncodingsH, mappingCount * sizeof(TextEncoding) ); sAllTextEncodingsArr = new LArray( sizeof(TextEncoding), (Handle)allTextEncodingsH, new CEncValueComparator(), false, true); sAllTextEncodingsArr->Sort(); TextEncoding unwanted = kTextEncodingMultiRun; ArrayIndexT indx; if( (indx = sAllTextEncodingsArr->FetchIndexOf( &unwanted )) != LArray::index_Bad ) { sAllTextEncodingsArr->RemoveItemsAt(1, indx); } /* //internet names test: for (ArrayIndexT indx = 1; indx < sAllTextEncodingsArr->GetCount(); indx++) { TextEncoding someEnc; Str255 encName; sAllTextEncodingsArr->FetchItemAt(indx, &someEnc); OSStatus stat = ::TECGetTextEncodingInternetName( someEnc, encName ); if(stat == kTextUnsupportedEncodingErr) { SignalStringLiteral_("Cannot get internet name for specified encoding"); } else { Assert_(stat == noErr); } } */ return true; } } } //if we fail: if(allTextEncodingsH != nil) ::DisposeHandle( (Handle)allTextEncodingsH ); return false; } //sorted array of encodings must be given on input void CConverter::GetSpecifiedEncodingsRange(LArray *inEncArr, TextEncoding inStandard, UInt32 &outStart, UInt32 &outEnd) { outStart = inStandard; switch(inStandard) { case kStandardMac: outEnd = kStandardUnicode-1; break; case kStandardUnicode: outEnd = kStandardISO-1; break; case kStandardISO: outEnd = kStandardDOS-1; break; case kStandardDOS: outEnd = kStandardWindows-1; break; case kStandardWindows: outStart = kTextEncodingDOSThai;//yes! - Windows uses some DOS encodings - look description outEnd = kStandardOther-1; break; case kStandardOther: outEnd = 0xFFFF; break; default: outStart = outEnd = 0; break; } if(outStart != outEnd) { FindIndexRange(inEncArr, outStart, outEnd); } } //sorted array of encodings must be given on input void CConverter::FindIndexRange(LArray *inEncArr, UInt32 &outStart, UInt32 &outEnd) { CEncValueComparator *comp = CEncValueComparator::GetComparator(); TextEncoding encStart = outStart, encEnd = outEnd; ArrayIndexT left = LArray::index_First; ArrayIndexT right = inEncArr->GetCount(); SInt32 comparison = 0; SInt32 current = 0; //find bigger or equal to encStart while(left <= right) { current = (left + right)/2; comparison = comp->Compare( inEncArr->GetItemPtr(current), &encStart ); if(comparison == 0) break; else if (comparison > 0) right = current - 1; else left = current + 1; } //now we are somewhere near - check last comparison if( comparison < 0 ) current++; outStart = current; right = inEncArr->GetCount(); left = current; comparison = 0; //find less or equal to encEnd while(left <= right) { current = (left + right)/2; comparison = comp->Compare( inEncArr->GetItemPtr(current), &encEnd ); if(comparison == 0) break; else if (comparison > 0) right = current - 1; else left = current + 1; } //now we are somewhere near - check last comparison if( comparison > 0 ) current--; outEnd = current; if(outStart > outEnd) outStart = outEnd = LArray::index_Bad; } void CConverter::DisposeAllEncodingMappings() { if( sAllTextEncodingsArr != nil ) { delete sAllTextEncodingsArr; sAllTextEncodingsArr = nil; } } Boolean CConverter::GetEncodingName(TextEncoding inEncoding, TextEncodingNameSelector inSelector, Str255 outName) { OSStatus status; ByteCount outputLen; TextEncoding nameEncoding; RegionCode actualRegion; Boolean isHandled = false; if( sAllTextEncodingsArr == nil ) return false; TextEncodingBase theBase = ::GetTextEncodingBase(inEncoding); TextEncodingBase standard; if( (theBase >= kStandardMac) && (theBase < kStandardUnicode) ) standard = kStandardMac; else if( (theBase >= kStandardUnicode) && (theBase < kStandardISO) ) standard = kStandardUnicode; else if( (theBase >= kStandardISO) && (theBase < kStandardDOS) ) standard = kStandardISO; else if( (theBase >= kStandardDOS) && (theBase < kStandardWindows) ) standard = kStandardDOS; else if( (theBase >= kStandardWindows) && (theBase < kStandardOther) ) standard = kStandardWindows; else if( (theBase >= kStandardOther) && (theBase < 0xFFFF) ) standard = kStandardOther; switch (inSelector) { case kTextEncodingBaseName: { isHandled = FindEncodingName(theBase, standard, outName); if( !isHandled ) { status = ::GetTextEncodingName( inEncoding, kTextEncodingBaseName, verUS, kTextEncodingUS_ASCII, sizeof(Str255), &outputLen, &actualRegion, &nameEncoding, (TextPtr) outName + 1 ); if( (status == noErr) && ( (nameEncoding == kTextEncodingMacRoman) || (nameEncoding == kTextEncodingUS_ASCII) )) outName[0] = (UInt8) outputLen; else outName[0] = 0; VerifyAndCleanBaseName(outName, inEncoding); isHandled = true; } } break; case kTextEncodingVariantName: { TextEncodingVariant theVar = ::GetTextEncodingVariant(inEncoding); if(standard == kStandardUnicode) theBase = kTextEncodingUnicodeDefault; isHandled = FindEncodingVariantName(theBase, theVar, outName); if( !isHandled ) { status = ::GetTextEncodingName( inEncoding, kTextEncodingVariantName, verUS, kTextEncodingUS_ASCII, sizeof(Str255), &outputLen, &actualRegion, &nameEncoding, (TextPtr) outName + 1 ); if( (status == noErr) && ( (nameEncoding == kTextEncodingMacRoman) || (nameEncoding == kTextEncodingUS_ASCII) )) { outName[0] = (UInt8) outputLen; isHandled = (outputLen > 0); } else outName[0] = 0; } if( !isHandled && (theVar == kTextEncodingDefaultVariant) ) { outName[0] = 0; isHandled = true; } } break; case kTextEncodingFormatName: { TextEncodingFormat theForm = ::GetTextEncodingFormat(inEncoding); if(standard == kStandardUnicode) theBase = kTextEncodingUnicodeDefault; isHandled = FindEncodingFormatName(theBase, theForm, outName); if( !isHandled ) { status = ::GetTextEncodingName( inEncoding, kTextEncodingFormatName, verUS, kTextEncodingUS_ASCII, sizeof(Str255), &outputLen, &actualRegion, &nameEncoding, (TextPtr) outName + 1 ); if( (status == noErr) && ( (nameEncoding == kTextEncodingMacRoman) || (nameEncoding == kTextEncodingUS_ASCII) )) { outName[0] = (UInt8) outputLen; isHandled = (outputLen > 0); } else outName[0] = 0; } if( !isHandled && (theForm == kTextEncodingDefaultFormat) ) { outName[0] = 0; isHandled = true; } } break; } return isHandled; } Boolean CConverter::FindEncodingName(TextEncodingBase inBase, TextEncodingBase standardSelector, Str255 outName) { StResource encodingsMap( 'MapL', kEncodingsBaseResID + standardSelector, false, false ); if( encodingsMap.IsValid() ) { StHandleLocker theLock( (Handle)encodingsMap ); UInt32 count = ::GetHandleSize( (Handle)encodingsMap ) / sizeof(UInt32); SInt32 foundIndex = EncodingBinarySearch(inBase, (TextEncoding *)*(Handle)encodingsMap, count); if( -1 != foundIndex ) { ::GetIndString( outName, kEncodingsBaseResID + standardSelector, foundIndex + 1); return true; } } return false;//we did not handle this encoding } Boolean CConverter::FindEncodingVariantName(TextEncodingBase inBase, TextEncodingVariant inVariant, Str255 outName) { StResource varsMap( 'MapL', kVariantsStartResID + inBase, false, false ); if( varsMap.IsValid() ) { StHandleLocker theLock( (Handle)varsMap ); UInt32 count = ::GetHandleSize( (Handle)varsMap ) / sizeof(UInt32); SInt32 foundIndex = EncodingBinarySearch(inVariant, (TextEncoding *)*(Handle)varsMap, count); if( -1 != foundIndex ) { ::GetIndString( outName, kVariantsStartResID + inBase, foundIndex + 1); return true; } } return false;//we did not found this variant name } Boolean CConverter::FindEncodingFormatName(TextEncodingBase inBase, TextEncodingFormat inForm, Str255 outName) { StResource formsMap( 'MapL', kFormatsStartResID + inBase, false, false ); if( formsMap.IsValid() ) { StHandleLocker theLock( (Handle)formsMap ); UInt32 count = ::GetHandleSize( (Handle)formsMap ) / sizeof(UInt32); SInt32 foundIndex = EncodingBinarySearch(inForm, (TextEncoding *)*(Handle)formsMap, count); if( -1 != foundIndex ) { ::GetIndString( outName, kFormatsStartResID + inBase, foundIndex + 1); return true; } } return false;//we did not found this format name } SInt32 CConverter::EncodingBinarySearch(TextEncoding inEncoding, TextEncoding *mapPtr, UInt32 itemCount) { SInt32 left = 0; SInt32 right = (SInt32) itemCount - 1; SInt32 foundIndex = -1; while (left <= right) { SInt32 current = (left + right) / 2; SInt32 comparison = mapPtr[current] - inEncoding; if (comparison == 0) { foundIndex = current; break; } else if (comparison > 0) { right = current - 1; } else { left = current + 1; } } return foundIndex; } //if base is the same - this is the item we want SInt32 CConverter::EncodingBinarySearchByBase(TextEncoding inEncoding, TextEncoding *mapPtr, UInt32 itemCount) { SInt32 left = 0; SInt32 right = (SInt32) itemCount - 1; SInt32 foundIndex = -1; TextEncodingBase theBase = ::GetTextEncodingBase(inEncoding); while (left <= right) { SInt32 current = (left + right) / 2; SInt32 comparison = ::GetTextEncodingBase( mapPtr[current] ) - theBase; if (comparison == 0) { foundIndex = current; break; } else if (comparison > 0) { right = current - 1; } else { left = current + 1; } } return foundIndex; } void CConverter::VerifyAndCleanBaseName(Str255 ioName, TextEncoding inEncoding)//called after the base name obtained from TEC { LStringRef theName( sizeof(Str255), ioName ); if(ioName[0] == 0)//force some name with hex code decription { ::GetIndString(ioName, rMiscStrings, 17);//"\pUnknown" Assert_(ioName[0] != 0); theName += " (0x"; ByteCount hexLen; Str255 hexNumberRep; BufToHex( (const unsigned char*) &inEncoding, (unsigned char*) hexNumberRep+1, 4, hexLen, 0); hexNumberRep[0] = (unsigned char)hexLen; theName += hexNumberRep; theName += ")"; } else { LStringRef theRef( sizeof(Str255), ioName ); ;//get rid of "(Mac OS)" or "(ISO)" or "(DOS)" here } } #pragma mark - #pragma mark = Sniffing = Boolean CConverter::SniffFileContent(const FSRef &inRef, TextEncoding &suggestedEncoding) { CUTextFileStream theFile(inRef, fsRdPerm); //destructor will close the fork if needed try { theFile.OpenDataForkForReading(kTextIsNOTUnicode);//read everything - if unicode byte order is in front it is OK } catch(...) { //keep silent - it will show up later return false; } //allocate buffer: ByteCount srcLen = 1024;//1k for test should be enough Handle buffH = ::NewHandleClear(srcLen); if( buffH ) { AHandle block(buffH);//destructors will clean-up block.Lock(); char *src = *buffH; try { srcLen = theFile.Read(srcLen, src); } catch(...) { return false;//no need for alert } //OSStatus status = theFile.GetBytes(src, srcLen); //if( ((status != noErr) && (status != eofErr)) || (srcLen == 0) )//if no data available // return false;//not need for alert return SniffContent( (TextPtr)src, srcLen, suggestedEncoding); } return false; } bool CConverter::SniffClipboardContent(TextEncoding &suggestedEncoding) { bool isOK = false; try { OSStatus status = noErr; Handle srcH = NULL; ScrapFlavorType dataType = typeText; ScrapRef currScrap = CarbonScrap::GetCurrentScrap(); SInt32 srcSize = CarbonScrap::GetDataSize(currScrap, dataType); bool hasData = (srcSize > 0); if( !hasData ) { dataType = typeUnicodeText; srcSize = srcSize = CarbonScrap::GetDataSize(currScrap, dataType); hasData = (srcSize > 0); if( !hasData ) return false;//no need to alert now - it will show up later } srcH = ::NewHandle(srcSize); if(srcH == NULL) { CThrownOSErr err = memFullErr; } AHandle theDel(srcH); srcSize = CarbonScrap::GetData(currScrap, dataType, srcH); isOK = SniffContent( (TextPtr)*srcH, srcSize, suggestedEncoding); } catch(...) { } return isOK; } bool CConverter::SniffContent( TextPtr inputBuffer, ByteCount inputBufferLength, TextEncoding &suggestedEncoding) { bool sniffedOK = false; ItemCount maxNumberOfSniffers; ItemCount numberOfSniffers; OSStatus status = ::TECCountAvailableSniffers( &maxNumberOfSniffers ); if(status == noErr) { Handle sniffersH = ::NewHandleClear( maxNumberOfSniffers * sizeof(TextEncoding) ); ::HLock(sniffersH); status = ::TECGetAvailableSniffers( (TextEncoding *)*sniffersH, maxNumberOfSniffers, &numberOfSniffers); if(status == noErr) { if(numberOfSniffers < maxNumberOfSniffers) { StHandleUnlocker unlock(sniffersH); ::SetHandleSize(sniffersH, numberOfSniffers * sizeof(TextEncoding)); } TECSnifferObjectRef encodingSniffer; status = ::TECCreateSniffer( &encodingSniffer, (TextEncoding *)*sniffersH, numberOfSniffers);//check all available formats if(status == noErr) { Handle errorsH = ::NewHandleClear(numberOfSniffers * sizeof(ItemCount) ); if(errorsH) { AHandle errs(errorsH); errs.Lock(); Handle featuresH = ::NewHandleClear(numberOfSniffers * sizeof(ItemCount) ); if(featuresH) { AHandle feature(featuresH); feature.Lock(); status = ::TECSniffTextEncoding( encodingSniffer, inputBuffer, inputBufferLength, (TextEncoding *)*sniffersH, numberOfSniffers, (ItemCount *)*errorsH, 10, (ItemCount *)*featuresH, 30); if(status == noErr) { suggestedEncoding = ( (TextEncoding *)*sniffersH )[0]; if( ((ItemCount *)(*featuresH))[0] < ((ItemCount *)(*errorsH))[0] ) sniffedOK = false; else sniffedOK = true; } } } status = ::TECDisposeSniffer(encodingSniffer); } } ::DisposeHandle(sniffersH); } return sniffedOK; } #pragma mark - #pragma mark = Line breaks correction = void CConverter::AutoCorrectBreaks(TextPtr buf, ByteCount &len, ByteCount buffSize, TextEncoding inEncoding) { TextEncodingBase theBase = ::GetTextEncodingBase(inEncoding); if((theBase >= kStandardMac) && (theBase < kStandardUnicode)) { ChangeToMac(buf, len); //CR } else if((theBase >= kStandardUnicode) && (theBase < kStandardISO)) { TextEncodingFormat uFormat = ::GetTextEncodingFormat(inEncoding); if(uFormat == kUnicode16BitFormat) { UniChangeToUnicode(buf, len); //U+2029 for 16 bit Unicode } else if(uFormat == kUnicodeUTF8Format) { ; } else if(uFormat == kUnicodeUTF7Format) { ; } else if (uFormat == kUnicode32BitFormat) { ; } } else if((theBase >= kStandardISO) && (theBase < kStandardDOS)) { ChangeToUnix(buf, len); //LF } else if((theBase >= kStandardDOS) && (theBase < kStandardOther)) { ChangeToWindows(buf, len, buffSize); //DOS and Windows are the same = CRLF } else if((theBase >= kStandardOther) && (theBase < 0xFFFF)) { return; //no change } } void CConverter::ChangeBreaks(TextPtr buf, ByteCount &len, ByteCount buffSize, TextEncoding inEncoding, UInt16 inBreak) { bool isTwoByteUnicode = IsTwoByteUnicode( inEncoding ); switch(inBreak) { case kLineBreakMac: { if(isTwoByteUnicode) UniChangeToMac(buf, len); else ChangeToMac(buf, len); } break; case kLineBreakUnix: { if(isTwoByteUnicode) UniChangeToUnix(buf, len); else ChangeToUnix(buf, len); } break; case kLineBreakWindows: { if(isTwoByteUnicode) UniChangeToWindows(buf, len, buffSize); else ChangeToWindows(buf, len, buffSize); } break; case kLineBreakUnicode: { if(isTwoByteUnicode) { UniChangeToUnicode(buf, len); } } break; case kLineBreakNoChange: default: { } break; } } void CConverter::ChangeToMac(TextPtr outBuf, ByteCount &inLen)//8-bit text - we look for LF and CRLF - the rest we are not interested in { //ouput text may be shorter (from windows) or equal (from unix) //look for: // - LF - kUnixBreak // - CRLF - kWindowsBreak = kMacBreak + kUnixBreak Boolean prevChanged = false; //make sure the first item is not a unix break if(outBuf[0] == kUnixBreak) { outBuf[0] = kMacBreak; prevChanged = true; } for(UInt32 i = 1; i< inLen; i++) { if( outBuf[i] == kUnixBreak) {//something found if( !prevChanged && (outBuf[i-1] == kMacBreak) )//check for CR on prev pos, but skip it if it was changed last time! { //it is a windows break //shift data one char left to strip LF ::BlockMoveData( &outBuf[i+1], &outBuf[i], inLen - i -1 ); inLen--; i--;//must check this char again since me moved the data and now it is a different char } else//it is a unix break... { outBuf[i] = kMacBreak;//..or was } prevChanged = true; } else prevChanged = false; } } void CConverter::UniChangeToUnicode(TextPtr outBuf, ByteCount &inLen) //unicode text 16 bit { //output text will be shorter or equal //kUniMacBreak //kUniUnixBreak //windows break = kUniMacBreak + kUniUnixBreak UniCharPtr uText = (UniCharPtr)outBuf; UInt32 uLen = inLen / sizeof(UniChar); for(UInt32 i = 0; i< uLen; i++) { if( uText[i] == kUniMacBreak ) {//something found if( uText[i+1] == kUniUnixBreak )//check for 00 LF on next pos { //it is a converted windows break //write unicode break in place of 00 CR uText[i] = kUnicodeBreak; //and "eat" the next 00 LF (shift data one unichar left) ::BlockMoveData( &uText[i+2], &uText[i+1], (uLen - i - 2)*sizeof(UniChar) ); uLen--; } else//it is a converted mac break... { uText[i] = kUnicodeBreak;//...or was } } else if(uText[i] == kUniUnixBreak )//a stand-alone converted unix break { uText[i] = kUnicodeBreak; } } inLen = uLen*sizeof(UniChar); } void CConverter::ChangeToUnix(TextPtr outBuf, ByteCount &inLen)//8-bit text { //ouput text may be shorter (from windows) or equal (from mac) //look for: // - CR - kMacBreak // - CRLF - kWindowsBreak for(UInt32 i = 0; i< inLen; i++) { if( outBuf[i] == kMacBreak ) {//something found if( outBuf[i+1] == kUnixBreak )//check for LF on next pos { //it is a windows break //write unix break in place of CR outBuf[i] = kUnixBreak; //and "eat" the next LF (shift data one char left) ::BlockMoveData( &outBuf[i+2], &outBuf[i+1], inLen - i - 2 ); inLen--; } else//it is a mac break... { outBuf[i] = kUnixBreak;//...or was } } } } void CConverter::ChangeToWindows(TextPtr outBuf, ByteCount &inLen, ByteCount buffSize)//8-bit text { #ifndef Debug_Signal #pragma unused (buffSize) #endif Assert_( (inLen*2) <= buffSize);//buffer must be long enough to hold the growing text //output text will be longer or equal //we look for // - CR - kMacBreak // - LF - kUnixBreak //but skip CRLF combination for(UInt32 i = 0; i< inLen; i++) { if( outBuf[i] == kMacBreak ) {//something found if( outBuf[i+1] == kUnixBreak )//check for LF on next pos { //it is a windows break so skip it i++; } else {//it is a mac break //first char ok //shift data one char right ::BlockMoveData( &outBuf[i+1], &outBuf[i+2], inLen - i - 1 ); //and insert LF outBuf[i+1] = kUnixBreak; inLen++; i++; } } else if( outBuf[i] == kUnixBreak )//unix break { //write CR in place of LF outBuf[i] = kMacBreak; //shift data one char right ::BlockMoveData( &outBuf[i+1], &outBuf[i+2], inLen - i - 1 ); //and insert LF outBuf[i+1] = kUnixBreak; inLen++; i++; } } } void CConverter::UniChangeToMac(TextPtr outBuf, ByteCount &inLen) { UniCharPtr uText = (UniCharPtr)outBuf; UInt32 uLen = inLen / sizeof(UniChar); //ouput text may be shorter (from windows) or equal (from unix) //look for: // - LF - kUniUnixBreak // - CRLF - kUniWindowsBreak = kUniMacBreak + kUniUnixBreak // - PS - kUnicodeBreak Boolean prevChanged = false; //make sure the first item is not a unix break if(uText[0] == kUniUnixBreak) { uText[0] = kUniMacBreak; prevChanged = true; } for(UInt32 i = 1; i< uLen; i++) { if( uText[i] == kUniUnixBreak) {//something found if( !prevChanged && (uText[i-1] == kMacBreak) )//check for CR on prev pos, but skip it if it was changed last time! { //it is a windows break //shift data one char left to strip LF ::BlockMoveData( &uText[i+1], &uText[i], (uLen - i -1)*sizeof(UniChar) ); uLen--; i--;//must check this char again since me moved the data and now it is a different char } else//it is a unix break... { uText[i] = kUniMacBreak;//..or was } prevChanged = true; } else if( uText[i] == kUnicodeBreak ) { uText[i] = kUniMacBreak; prevChanged = false; } else prevChanged = false; } } void CConverter::UniChangeToUnix(TextPtr outBuf, ByteCount &inLen) { UniCharPtr uText = (UniCharPtr)outBuf; UInt32 uLen = inLen / sizeof(UniChar); //ouput text may be shorter (from windows) or equal (from mac) //look for: // - CR - kUniMacBreak // - CRLF - kUniWindowsBreak // - PS - kUnicodeBreak for(UInt32 i = 0; i< uLen; i++) { if( uText[i] == kUniMacBreak ) {//something found if( uText[i+1] == kUniUnixBreak )//check for LF on next pos { //it is a windows break //write unix break in place of CR uText[i] = kUniUnixBreak; //and "eat" the next LF (shift data one char left) ::BlockMoveData( &uText[i+2], &uText[i+1], (uLen - i - 2)*sizeof(UniChar) ); uLen--; } else//it is a mac break... { uText[i] = kUniUnixBreak;//...or was } } else if( uText[i] == kUnicodeBreak ) { uText[i] = kUniUnixBreak; } } } void CConverter::UniChangeToWindows(TextPtr outBuf, ByteCount &inLen, ByteCount buffSize) { #ifndef Debug_Signal #pragma unused (buffSize) #endif Assert_( (inLen*2) <= buffSize);//buffer must be long enough to hold the growing text //output text will be longer or equal //we look for // - PS - kUnicodeBreak // - CR - kUniMacBreak // - LF - kUniUnixBreak //but skip CRLF combination UniCharPtr uText = (UniCharPtr)outBuf; UInt32 uLen = inLen / sizeof(UniChar); for(UInt32 i = 0; i< uLen; i++) { if( uText[i] == kUniMacBreak ) {//something found if( uText[i+1] == kUniUnixBreak )//check for LF on next pos { //it is a windows break so skip it i++; } else {//it is a mac break //first char ok //shift data one char right ::BlockMoveData( &uText[i+1], &uText[i+2], (uLen - i - 1)*sizeof(UniChar) ); //and insert LF uText[i+1] = kUniUnixBreak; uLen++; i++; } } else if( (uText[i] == kUniUnixBreak) || (uText[i] == kUnicodeBreak) )//unix break or Unicode break { //write CR in place of LF uText[i] = kUniMacBreak; //shift data one char right ::BlockMoveData( &uText[i+1], &uText[i+2], (uLen - i - 1)*sizeof(UniChar) ); //and insert LF uText[i+1] = kUniUnixBreak; uLen++; i++; } } }