00001 #ifndef sort_with_temp_files_h
00002 #define sort_with_temp_files_h
00003
00004 #include "bserialize.h"
00005
00006
00007 const size_t SortBufferSize = 100000000;
00008
00009
00010
00011 template <class T>
00012 bool CopyRestOfBinaryFiles (FILE* in_fp, FILE* out_fp, size_t i, size_t Max_i, T& V)
00013 {
00014 for (; i < Max_i; )
00015 {
00016 if (!BinaryWriteItem(out_fp, V))
00017 return false;
00018 i++;
00019 if (i != Max_i)
00020 if (!BinaryReadItem(in_fp, V))
00021 return false;
00022 }
00023 return true;
00024
00025 }
00026
00027
00028 template <class T>
00029 string UnionBinaryFiles (string InputFileName1, string InputFileName2)
00030 {
00031 string OutputFileName = CreateTempFileName();
00032 FILE* fp1 = 0;
00033 FILE* fp2 = 0;
00034 FILE* out_fp = 0;
00035 T V1, V2;
00036 size_t Count1 = (size_t)FileSize(InputFileName1.c_str())/get_size_in_bytes(V1);
00037 size_t Count2 = (size_t)FileSize(InputFileName2.c_str())/get_size_in_bytes(V2);
00038
00039 try {
00040 if (Count1 == 0)
00041 if (!RmlMoveFile (InputFileName2.c_str(), OutputFileName.c_str()))
00042 throw (Format( "Could not move %s to %s", InputFileName2.c_str(), OutputFileName.c_str()));
00043 else
00044 return OutputFileName;
00045 if (Count2 == 0)
00046 if (!RmlMoveFile (InputFileName1.c_str(), OutputFileName.c_str()))
00047 throw (Format( "Could not move %s to %s", InputFileName1.c_str(), OutputFileName.c_str()));
00048 else
00049 return OutputFileName;
00050
00051
00052
00053 fp1 = fopen (InputFileName1.c_str(), "rb");
00054 if (!fp1)
00055 throw CExpc ("cannot read %s\n", InputFileName1.c_str());
00056 fp2 = fopen (InputFileName2.c_str(), "rb");
00057 if (!fp2)
00058 throw CExpc ("cannot read %s\n", InputFileName2.c_str());
00059
00060
00061 if (!BinaryReadItem(fp1, V1))
00062 throw CExpc ("cannot read item form %s\n", InputFileName1.c_str());
00063
00064 if (!BinaryReadItem(fp2, V2))
00065 throw CExpc ("cannot read item form %s\n", InputFileName2.c_str());
00066
00067 size_t i1 = 0;
00068 size_t i2 = 0;
00069 string Error;
00070 out_fp = fopen (OutputFileName.c_str(), "wb");
00071
00072 for (; i1 != Count1 && i2 != Count2; )
00073 {
00074 if ( V1 < V2 )
00075 {
00076 if (!BinaryWriteItem(out_fp, V1))
00077 throw CExpc ("cannot write item to %s\n", OutputFileName.c_str());
00078
00079 i1++;
00080 if (i1 != Count1)
00081 if (!BinaryReadItem(fp1, V1))
00082 throw CExpc ("cannot read item form %s\n", InputFileName1.c_str());
00083 }
00084 else if (V2 < V1)
00085 {
00086 if (!BinaryWriteItem(out_fp, V2))
00087 throw CExpc ("cannot write item to %s\n", OutputFileName.c_str());
00088
00089 i2++;
00090 if (i2 != Count2)
00091 if (!BinaryReadItem(fp2, V2))
00092 throw CExpc ("cannot read item form %s\n", InputFileName2.c_str());
00093 }
00094 else
00095 {
00096
00097 if (!BinaryWriteItem(out_fp, V1))
00098 throw CExpc ("cannot write item to %s\n", OutputFileName.c_str());
00099
00100 if (!BinaryWriteItem(out_fp, V2))
00101 throw CExpc ("cannot write item to %s\n", OutputFileName.c_str());
00102
00103 i1++;
00104 if (i1 != Count1)
00105 if (!BinaryReadItem(fp1, V1))
00106 throw CExpc ("cannot read item form %s\n", InputFileName1.c_str());
00107
00108 i2++;
00109 if (i2 != Count2)
00110 if (!BinaryReadItem(fp2, V2))
00111 throw CExpc ("cannot read item form %s\n", InputFileName2.c_str());
00112 }
00113 }
00114 if (!CopyRestOfBinaryFiles<T>(fp1, out_fp, i1, Count1, V1) || !CopyRestOfBinaryFiles<T>(fp2, out_fp, i2, Count2, V2))
00115 throw CExpc("unexpected error in CopyBinaryFiles");
00116 }
00117 catch ( CExpc c)
00118 {
00119 fprintf (stderr, "Error in UnionBinaryFiles: %s", c.m_strCause.c_str());
00120 remove (OutputFileName.c_str());
00121 OutputFileName = "";
00122 }
00123
00124 if (fp1) fclose (fp1);
00125 if (fp2) fclose (fp2);
00126 if (out_fp) fclose (out_fp);
00127
00128 return OutputFileName;
00129
00130 }
00131
00132 template <class T>
00133 bool SortWithFiles (string InputFileName, string OutputFileName)
00134 {
00135 file_off_t sz = FileSize(InputFileName.c_str());
00136 T dummy;
00137 size_t Count = (size_t)sz/get_size_in_bytes(dummy);
00138 size_t PortionSize = max((size_t)1, SortBufferSize/get_size_in_bytes(dummy));
00139
00140 FILE* fp = fopen (InputFileName.c_str(), "rb");
00141 if (!fp)
00142 {
00143 fprintf (stderr, "Cannot open %s", InputFileName.c_str());
00144 return false;
00145 }
00146 vector<string> PortionNames;
00147 for (size_t i =0; i<Count; i+= PortionSize)
00148 {
00149 size_t End = min (Count, i+PortionSize);
00150 vector<T> Portion;
00151 ReadVectorInner(fp, Portion, End - i);
00152 sort (Portion.begin(), Portion.end());
00153 PortionNames.push_back(CreateTempFileName());
00154 WriteVector(PortionNames.back(), Portion);
00155 }
00156 fclose (fp);
00157
00158 while (PortionNames.size() > 1)
00159 {
00160 size_t SaveCount = PortionNames.size();
00161 vector<string> NewPortionNames;
00162 for (size_t i=0; i < PortionNames.size(); i+=2)
00163 {
00164 if (i+1 == PortionNames.size())
00165 NewPortionNames.insert(NewPortionNames.begin(), PortionNames.back());
00166 else
00167 {
00168 string UnitedFile = UnionBinaryFiles<T>(PortionNames[i], PortionNames[i+1]);
00169 if (UnitedFile.empty())
00170 return false;
00171
00172 NewPortionNames.push_back( UnitedFile );
00173 if (remove(PortionNames[i+1].c_str()) != 0)
00174 {
00175 fprintf (stderr, "Cannot remove temp file%s", PortionNames[i+1].c_str());
00176 return false;
00177 }
00178 if (remove(PortionNames[i].c_str()) !=0 )
00179 {
00180 fprintf (stderr, "Cannot remove temp file%s", PortionNames[i].c_str());
00181 return false;
00182 }
00183 }
00184 }
00185 NewPortionNames.swap(PortionNames);
00186 if (PortionNames.size() == SaveCount)
00187 throw CExpc("dead lock in SortWithFiles");
00188 SaveCount = PortionNames.size();
00189 }
00190 if (FileExists(OutputFileName.c_str()))
00191 remove (OutputFileName.c_str());
00192 return RmlMoveFile (PortionNames[0].c_str(), OutputFileName.c_str());
00193
00194
00195 }
00196
00197 template <class T>
00198 bool SortWithFiles (string InputFileName)
00199 {
00200 string TmpName = CreateTempFileName();
00201 if (!SortWithFiles<T>(InputFileName, TmpName))
00202 return false;
00203
00204 remove (InputFileName.c_str());
00205
00206 if (!RmlMoveFile (TmpName.c_str(), InputFileName.c_str()))
00207 {
00208 fprintf (stderr, "cannot move %s to %s\n",TmpName.c_str(), InputFileName.c_str());
00209 remove (TmpName.c_str());
00210 return false;
00211 }
00212 return true;
00213 }
00214
00215
00216 #endif