今回はopen type fontのフォントファイルから変換表を取り出し、利用する処理を実装します。
これまではttfdump.exeでcmap_msgothic.txtを手動作成していました。
この処理を自動化します。
取り出す変換表はplatformID == 3 && encodingID == 1のもののみに対応します。
open type fontの仕様を見ると、
https://www.microsoft.com/typography/otspec/otff.htm
ファイル先頭に4バイトのsfnt version、続いて2バイトのnumTables(テーブル数)、2バイトのsearchRange、2バイトのentrySelector、2バイトのrangeShiftが格納されており、
続いてnumTablesの分だけ「テーブル」が並びます。
1つのテーブルはtag、checkSum、offset、lengthがそれぞれ4バイト、合計16バイト分あり、
tagが"cmap"のテーブルの(ファイル先頭からの)offset位置に変換表が用意されています。
変換表の仕様を見ると、
https://www.microsoft.com/typography/otspec/cmap.htm
先頭から、2バイトのversion、2バイトのnumTables(テーブル数)があり、
続いてnumTablesの分だけ「テーブル」が並びます。
1つのテーブルは2バイトのplatformIDとencodingID、4バイトのoffsetの合計8バイト分あり、
(ファイル先頭ではなく"cmap"の先頭からの)offset位置に、platformIDとencodingIDの示す変換表が格納されています。
変換表の先頭には2バイトのformatがあり、このformatの値により変換表の格納形式が異なります。
今回利用する変換表はplatformIDが3、encodingIDが1なので、format=4(Format 4: Segment mapping to delta values)の形式で格納されています。
format 4の変換表の中には様々な情報が格納されていますが、その中で以下の3つの配列が必要になります。
・startCount[]
・endCount[]
・idDelta[]
例えばユニコードで0x5511を変換する場合には、
startCount[n] <= 0x5511 <= endCount[n]
を満たす n の値を探し、以下のように変換します。
変換後の値=0x5511 + dDelta[n]
今回はユニコードで0x0000~0xFFFFまですべての文字についての変換表が欲しいので、
すべての値において上記の計算を行って変換表を作成しています。
public void Close() { _cmapFonts.Clear(); _listnXref.Clear(); _mapFontType.Clear(); }
/// <summary> /// フォントの埋め込み /// </summary> void WriteFont_EmbeddedUnicode(PDFRawWriter bw, ref int nObjIndex, string strFontObjName, string strFont, string strFontFamily, string strFontFile) { _listnXref.Add(bw.BaseStream.Position); { bw.WriteLine("" + nObjIndex + " 0 obj"); bw.WriteLine("<</Type /Font"); bw.WriteLine("/BaseFont /" + strFont); bw.WriteLine("/Subtype /Type0"); bw.WriteLine("/Encoding /Identity-H"); //PDF独自のエンコード bw.WriteLine("/DescendantFonts [" + (nObjIndex + 1) + " 0 R]"); bw.WriteLine("/ToUnicode " + (nObjIndex + 4) + " 0 R"); //ToUnicode変換表 bw.WriteLine(">>"); bw.WriteLine("endobj"); } nObjIndex++; int nDescendantFontsObjIndex = nObjIndex; _listnXref.Add(bw.BaseStream.Position); { bw.WriteLine("" + nObjIndex + " 0 obj"); bw.WriteLine("<</Type /Font"); bw.WriteLine("/Subtype /CIDFontType0"); bw.WriteLine("/BaseFont /" + strFont); //bw.WriteLine("/CIDToGIDMap/Identity"); bw.WriteLine("/CIDSystemInfo <<"); bw.WriteLine("/Registry (Adobe)"); bw.WriteLine("/Ordering (Identity)"); //Japan1にはしない bw.WriteLine("/Supplement 0"); //6にした方がいい? bw.WriteLine(">>"); bw.WriteLine("/FontDescriptor " + (nObjIndex + 1) + " 0 R"); bw.WriteLine(">>"); bw.WriteLine("endobj"); } nObjIndex++; ushort nRangeMin = 0xFFFF; ushort nRangeMax = 0; //opentypeフォントファイルからcmapを読み込む IDictionary<ushort, byte[]> cmap = LoadCMap(strFontFile, out nRangeMin, out nRangeMax); _cmapFonts.Add(strFontObjName, cmap); ////CMAPの準備 ////{ // string strFontCMapFile = @"cmap_msgothic.txt"; // // // //CMAPの読み込み // // // //以下を実行してcmap.txtを取得、その中から「Char 30D6 -> Index 2121」というようなunicode用のcmapテーブルを抜き出してcmap_msgothic.txtに保存 // // ttfdump.exe HuiFont29.ttf -tcmap -nx >cmap.txt // // // // ttfdump.exeは以下からダウンロード(fonttools.exeに含まれる) // // https://www.microsoft.com/typography/tools/tools.aspx // // https://download.microsoft.com/download/f/f/a/ffae9ec6-3bf6-488a-843d-b96d552fd815/FontTools.exe // // // // // // //本当なら↓のコードで簡単にフォントからcmapを取得できるはずだけど、 // // //きちんとした対応にならない??? // // { // // //「PresentationCore」への参照追加 // // GlyphTypeface gtf = new GlyphTypeface(new Uri(strFontFile)); // // var cmap = gtf.CharacterToGlyphMap; // // } // // // IDictionary<ushort, byte[]> cmap = new Dictionary<ushort, byte[]>(); // _cmapFonts.Add(strFontObjName, cmap); // using (FileStream fs = new FileStream(strFontCMapFile, FileMode.Open, FileAccess.Read)) // using (StreamReader sr = new StreamReader(fs)) // { // string strCMap = sr.ReadToEnd(); // Regex re = new Regex(@"Char ([ABCDEFabcdef\d]+) -> Index (\d+)", RegexOptions.IgnoreCase); // Match m = re.Match(strCMap); // while (m.Success) // { // try // { // string strChar = m.Groups[1].Value; // string strIndex = m.Groups[2].Value; // ushort nChar = Convert.ToUInt16(strChar, 16); // ushort nIndex = ushort.Parse(strIndex); // //ビッグエンディアン変換 // byte tmp; // byte[] bytes = BitConverter.GetBytes(nIndex); // tmp = bytes[1]; // bytes[1] = bytes[0]; // bytes[0] = tmp; // cmap.Add(nChar, bytes); // //indexの最小値最大値を保存しておく // if (nIndex < nRangeMin) // nRangeMin = nIndex; // if (nIndex > nRangeMax) // nRangeMax = nIndex; // } // catch (Exception) // { // } // m = m.NextMatch(); // } // } ////} int nFontDescriptorObjIndex = nObjIndex; _listnXref.Add(bw.BaseStream.Position); { bw.WriteLine("" + nObjIndex + " 0 obj"); bw.WriteLine("<</Type /FontDescriptor"); bw.WriteLine("/FontName /" + strFont); bw.WriteLine("/FontFamily(" + strFontFamily + ")"); //bw.WriteLine(@"/Style<</Panose <0801020B0609070205080204> >>"); //The font family class and subclass ID bytes, given in the sFamilyClass field of the “OS/2” table in a TrueType font. This field is documented in Microsoft’s TrueType 1.0 Font Files Technical Specification //bw.WriteLine("/CIDSet 15 0 R"); //CID表 bw.WriteLine("/FontFile2 " + (nObjIndex + 1) + " 0 R"); bw.WriteLine("/Flags 6"); //Font uses the Adobe standard Latin character set or a subset of it bw.WriteLine("/FontBBox [0 0 0 0]"); //だから0 0 0 0で自動にする //bw.WriteLine("/FontBBox [-437 -340 1147 1317]"); //指定例 bw.WriteLine("/ItalicAngle 0"); //PostScriptHeaaderの値?面倒だからスルー //bw.WriteLine("/Lang/ja"); //日本語指定しておく? bw.WriteLine("/Ascent 1317"); bw.WriteLine("/Descent -349"); bw.WriteLine("/CapHeight 742"); //取得方法不明 bw.WriteLine("/StemV 80"); //取得方法不明 bw.WriteLine(">>"); bw.WriteLine("endobj"); } nObjIndex++; int nFontFileObjIndex = nObjIndex; _listnXref.Add(bw.BaseStream.Position); { long nEncodedLength; long nDecodedLength; byte[] data; using (FileStream fs = new FileStream(strFontFile, FileMode.Open, FileAccess.Read)) using (BinaryReader br = new BinaryReader(fs)) { nDecodedLength = fs.Length; data = br.ReadBytes((int)nDecodedLength); } byte[] compress = CompressDeflate(data); nEncodedLength = compress.Length; bw.WriteLine("" + nObjIndex + " 0 obj"); bw.WriteLine("<</Filter /FlateDecode /Length " + nEncodedLength + " /Length1 " + nDecodedLength + ">>"); bw.WriteLine("stream"); { byte[] header = new byte[2]; header[0] = 0x78; header[1] = 0x9c; bw.Write(header); } bw.Write(compress); bw.Write0x0a(); bw.WriteLine("endstream"); bw.WriteLine("endobj"); } nObjIndex++; //ToUnicode変換表 if (nRangeMin <= nRangeMax) { byte[] data; using (MemoryStream ms = new MemoryStream()) using (StreamWriter bwms = new StreamWriter(ms, Encoding.ASCII)) { // /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo << // /Registry (TT1+0) /Ordering (T42UV) /Supplement 0 >> def // /CMapName /TT1+0 def // /CMapType 2 def // 1 begincodespacerange <0003> <0836> endcodespacerange //<index>の最小最大値 // 6 beginbfchar //続く行数 // <0003> <0020> //<index> <unicode> // <001d> <003A> // <0044> <0061> // <0057> <0074> // <005b> <0078> // <0836> <3042> // endbfchar // 1 beginbfrange //続く行数 // <0010> <001a> <002D> //いっぱいあるとき用。<0010> <001a> が<002D>に対応 // endbfrange // endcmap CMapName currentdict /CMap defineresource pop end end bwms.Write("/CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo <<\r"); bwms.Write("/Registry (" + strFontObjName + "+0) /Ordering (T42UV) /Supplement 0 >> def\r"); bwms.Write("/CMapName /" + strFontObjName + "+0 def\r"); bwms.Write("/CMapType 2 def\r"); bwms.Write("1 begincodespacerange <" + ConvertToHex_BE(nRangeMin) + "> <" + ConvertToHex_BE(nRangeMax) + "> endcodespacerange\r"); bwms.Write("" + cmap.Count + " beginbfchar\r"); foreach (KeyValuePair<ushort, byte[]> pair in cmap) { string value = String.Format("{0:X2}", pair.Value[0]) + String.Format("{0:X2}", pair.Value[1]); bwms.Write("<" + value + "> <" + ConvertToHex_BE(pair.Key) + ">\r"); } bwms.Write("endbfchar\r"); bwms.Write("endcmap CMapName currentdict /CMap defineresource pop end end\r"); bwms.Flush(); data = ms.ToArray(); } if (data.Length > 0) { //17 0 obj //<</Filter/FlateDecode/Length 269>>stream _listnXref.Add(bw.BaseStream.Position); { long nEncodedLength; byte[] compress = CompressDeflate(data); nEncodedLength = compress.Length; bw.WriteLine("" + nObjIndex + " 0 obj"); bw.WriteLine("<</Filter /FlateDecode /Length " + nEncodedLength + ">>"); bw.WriteLine("stream"); { byte[] header = new byte[2]; header[0] = 0x78; header[1] = 0x9c; bw.Write(header); } bw.Write(compress); bw.Write0x0a(); bw.WriteLine("endstream"); bw.WriteLine("endobj"); } nObjIndex++; } } //フォント情報の保存 _mapFontType.Add(strFontObjName, FONT_TYPE.EMBEDDED); } string ConvertToHex_BE(ushort value) { byte[] bytes = BitConverter.GetBytes(value); return String.Format("{0:X2}", bytes[1]) + String.Format("{0:X2}", bytes[0]); } /// <summary> /// open type fontファイルからcmapを読み取る /// /// open type fontの仕様通りにファイルを読むだけ /// マジックナンバーなどでファイルチェックをするべきだがしていない /// /// 仕様 /// https://www.microsoft.com/typography/otspec/otff.htm /// </summary> IDictionary<ushort, byte[]> LoadCMap(string strFontFile, out ushort nRangeMin, out ushort nRangeMax)//, out int nBBXMin, out int nBBXMax, out int nBBYMin, out int nBBYMax, out int nAscender, out int nDescender) { IDictionary<ushort, byte[]> cmap = new Dictionary<ushort, byte[]>(); nRangeMin = 0xFFFF; nRangeMax = 0; int nBBXMin = 0; int nBBXMax = 0; int nBBYMin = 0; int nBBYMax = 0; int nAscender = 0; int nDescender = 0; using (FileStream fs = new FileStream(strFontFile, FileMode.Open, FileAccess.Read)) using (BinaryReader br = new BinaryReader(fs)) { // https://www.microsoft.com/typography/otspec/otff.htm byte[] sfntVer = br.ReadBytes(4); uint nTableCount = ByteToUInt_BE(br.ReadBytes(2)); uint nSearchRange = ByteToUInt_BE(br.ReadBytes(2)); uint nEntrySelector = ByteToUInt_BE(br.ReadBytes(2)); uint nRangeShift = ByteToUInt_BE(br.ReadBytes(2)); uint nCMapOffset = 0; uint nCMapLength = 0; uint nHeadOffset = 0; uint nHeadLength = 0; uint nHheaOffset = 0; uint nHheaLength = 0; uint nOS2Offset = 0; uint nOS2Length = 0; for (uint i = 0; i < nTableCount; i++) { byte[] tag = br.ReadBytes(4); uint checkSum = ByteToUInt_BE(br.ReadBytes(4)); uint offset = ByteToUInt_BE(br.ReadBytes(4)); // Offset from beginning of TrueType font file. uint length = ByteToUInt_BE(br.ReadBytes(4)); string strTag = Encoding.ASCII.GetString(tag); if (strTag == "cmap") { nCMapOffset = offset; nCMapLength = length; } if (strTag == "head") { nHeadOffset = offset; nHeadLength = length; } if (strTag == "hhea") { nHheaOffset = offset; nHheaLength = length; } if (strTag == "OS/2") { nOS2Offset = offset; nOS2Length = length; } } if (nHheaOffset > 0 && nHheaLength > 0) { fs.Seek(nHheaOffset, SeekOrigin.Begin); // https://www.microsoft.com/typography/otspec/hhea.htm byte[] version = br.ReadBytes(4); nAscender = ByteToInt_BE(br.ReadBytes(2)); nDescender = ByteToInt_BE(br.ReadBytes(2)); int LineGap = ByteToInt_BE(br.ReadBytes(2)); uint advanceWidthMax = ByteToUInt_BE(br.ReadBytes(2)); int minLeftSideBearing = ByteToInt_BE(br.ReadBytes(2)); int minRightSideBearing = ByteToInt_BE(br.ReadBytes(2)); int xMaxExtent = ByteToInt_BE(br.ReadBytes(2)); int caretSlopeRise = ByteToInt_BE(br.ReadBytes(2)); int caretSlopeRun = ByteToInt_BE(br.ReadBytes(2)); int caretOffset = ByteToInt_BE(br.ReadBytes(2)); int reserved1 = ByteToInt_BE(br.ReadBytes(2)); int reserved2 = ByteToInt_BE(br.ReadBytes(2)); int reserved3 = ByteToInt_BE(br.ReadBytes(2)); int reserved4 = ByteToInt_BE(br.ReadBytes(2)); int metricDataFormat = ByteToInt_BE(br.ReadBytes(2)); uint numberOfHMetrics = ByteToUInt_BE(br.ReadBytes(2)); } if (nOS2Offset > 0 && nOS2Length > 0) { fs.Seek(nOS2Offset, SeekOrigin.Begin); // https://www.microsoft.com/typography/otspec/os2.htm uint version = ByteToUInt_BE(br.ReadBytes(2)); int xAvgCharWidth = ByteToInt_BE(br.ReadBytes(2)); uint usWeightClass = ByteToUInt_BE(br.ReadBytes(2)); uint usWidthClass = ByteToUInt_BE(br.ReadBytes(2)); uint fsType = ByteToUInt_BE(br.ReadBytes(2)); int ySubscriptXSize = ByteToInt_BE(br.ReadBytes(2)); int ySubscriptYSize = ByteToInt_BE(br.ReadBytes(2)); int ySubscriptXOffset = ByteToInt_BE(br.ReadBytes(2)); int ySubscriptYOffset = ByteToInt_BE(br.ReadBytes(2)); int ySuperscriptXSize = ByteToInt_BE(br.ReadBytes(2)); int ySuperscriptYSize = ByteToInt_BE(br.ReadBytes(2)); int ySuperscriptXOffset = ByteToInt_BE(br.ReadBytes(2)); int ySuperscriptYOffset = ByteToInt_BE(br.ReadBytes(2)); int yStrikeoutSize = ByteToInt_BE(br.ReadBytes(2)); int yStrikeoutPosition = ByteToInt_BE(br.ReadBytes(2)); int sFamilyClass = ByteToInt_BE(br.ReadBytes(2)); byte[] panose = br.ReadBytes(10); uint ulUnicodeRange1 = ByteToUInt_BE(br.ReadBytes(4)); uint ulUnicodeRange2 = ByteToUInt_BE(br.ReadBytes(4)); uint ulUnicodeRange3 = ByteToUInt_BE(br.ReadBytes(4)); uint ulUnicodeRange4 = ByteToUInt_BE(br.ReadBytes(4)); byte[] achVendID = br.ReadBytes(4); uint fsSelection = ByteToUInt_BE(br.ReadBytes(2)); uint usFirstCharIndex = ByteToUInt_BE(br.ReadBytes(2)); uint usLastCharIndex = ByteToUInt_BE(br.ReadBytes(2)); int sTypoAscender = ByteToInt_BE(br.ReadBytes(2)); int sTypoDescender = ByteToInt_BE(br.ReadBytes(2)); int sTypoLineGap = ByteToInt_BE(br.ReadBytes(2)); uint usWinAscent = ByteToUInt_BE(br.ReadBytes(2)); uint usWinDescent = ByteToUInt_BE(br.ReadBytes(2)); uint ulCodePageRange1 = ByteToUInt_BE(br.ReadBytes(4)); uint ulCodePageRange2 = ByteToUInt_BE(br.ReadBytes(4)); int sxHeight = ByteToInt_BE(br.ReadBytes(2)); int sCapHeight = ByteToInt_BE(br.ReadBytes(2)); uint usDefaultChar = ByteToUInt_BE(br.ReadBytes(2)); uint usBreakChar = ByteToUInt_BE(br.ReadBytes(2)); uint usMaxContext = ByteToUInt_BE(br.ReadBytes(2)); uint usLowerOpticalPointSize = ByteToUInt_BE(br.ReadBytes(2)); uint usUpperOpticalPointSize = ByteToUInt_BE(br.ReadBytes(2)); } if (nHeadOffset > 0 && nHeadLength > 0) { fs.Seek(nHeadOffset, SeekOrigin.Begin); // https://www.microsoft.com/typography/otspec/head.htm byte[] version = br.ReadBytes(4); byte[] fontRevision = br.ReadBytes(4); uint checkSumAdjustment = ByteToUInt_BE(br.ReadBytes(4)); uint magicNumber = ByteToUInt_BE(br.ReadBytes(4)); uint flags = ByteToUInt_BE(br.ReadBytes(2)); uint unitsPerEm = ByteToUInt_BE(br.ReadBytes(2)); byte[] created = br.ReadBytes(8); byte[] modified = br.ReadBytes(8); nBBXMin = ByteToInt_BE(br.ReadBytes(2)); nBBYMin = ByteToInt_BE(br.ReadBytes(2)); nBBXMax = ByteToInt_BE(br.ReadBytes(2)); nBBYMax = ByteToInt_BE(br.ReadBytes(2)); uint macStyle = ByteToUInt_BE(br.ReadBytes(2)); uint lowestRecPPEM = ByteToUInt_BE(br.ReadBytes(2)); int fontDirectionHint = ByteToInt_BE(br.ReadBytes(2)); int indexToLocFormat = ByteToInt_BE(br.ReadBytes(2)); int glyphDataFormat = ByteToInt_BE(br.ReadBytes(2)); } if (nCMapOffset > 0 && nCMapLength > 0) { fs.Seek(nCMapOffset, SeekOrigin.Begin); // https://www.microsoft.com/typography/otspec/cmap.htm uint version = ByteToUInt_BE(br.ReadBytes(2)); uint numTables = ByteToUInt_BE(br.ReadBytes(2)); uint nCMap31Offset = 0; for (uint i = 0; i < numTables; i++) { uint platformID = ByteToUInt_BE(br.ReadBytes(2)); uint encodingID = ByteToUInt_BE(br.ReadBytes(2)); uint offset = ByteToUInt_BE(br.ReadBytes(4)); // Byte offset from beginning of table to the subtable for this encoding. if (platformID == 3 && encodingID == 1) //ユニコードはWindows(3)の(1) { nCMap31Offset = offset; break; } } if (nCMap31Offset > 0) { fs.Seek(nCMapOffset, SeekOrigin.Begin); //cmap先頭に移動してから fs.Seek(nCMap31Offset, SeekOrigin.Current); //テーブルに移動 //Format 4: Segment mapping to delta values //16bitユニコードは4 uint format = ByteToUInt_BE(br.ReadBytes(2)); if (format == 4) //16bitユニコードは4 { uint length = ByteToUInt_BE(br.ReadBytes(2)); uint language = ByteToUInt_BE(br.ReadBytes(2)); uint segCountX2 = ByteToUInt_BE(br.ReadBytes(2)); uint searchRange = ByteToUInt_BE(br.ReadBytes(2)); uint entrySelector = ByteToUInt_BE(br.ReadBytes(2)); uint rangeShift = ByteToUInt_BE(br.ReadBytes(2)); uint[] endCount = new uint[segCountX2 / 2]; for (uint i = 0; i < endCount.Length; i++) { endCount[i] = ByteToUInt_BE(br.ReadBytes(2)); } uint reservedPad = ByteToUInt_BE(br.ReadBytes(2)); uint[] startCount = new uint[segCountX2 / 2]; for (uint i = 0; i < startCount.Length; i++) { startCount[i] = ByteToUInt_BE(br.ReadBytes(2)); } int[] idDelta = new int[segCountX2 / 2]; for (int i = 0; i < idDelta.Length; i++) { byte[] data = new byte[2]; data[0] = br.ReadByte(); data[1] = br.ReadByte(); idDelta[i] = ByteToInt_BE(data); } uint[] idRangeOffset = new uint[segCountX2 / 2]; for (uint i = 0; i < idRangeOffset.Length; i++) { idRangeOffset[i] = ByteToUInt_BE(br.ReadBytes(2)); } for (uint i = 0; i < startCount.Length; i++) { for (uint charCode = startCount[i]; charCode <= endCount[i]; charCode++) { if (charCode == 0xFFFF) continue; int index = (int)((short)charCode + (short)idDelta[i]); if (index < 0) index = (index & 0xFFFF); //uint16で無理やり処理 //Console.WriteLine(" Char {0:X4} -> Index {1}", charCode, index); //ビッグエンディアン変換 byte tmp; byte[] bytes = BitConverter.GetBytes((ushort)index); tmp = bytes[1]; bytes[1] = bytes[0]; bytes[0] = tmp; cmap.Add((ushort)charCode, bytes); //indexの最小値最大値を保存しておく if (index < nRangeMin) nRangeMin = (ushort)index; if (index > nRangeMax) nRangeMax = (ushort)index; } } } } } } return cmap; } int ByteToInt_BE(byte[] data) { if (data == null || data.Length == 0) return 0; if (data.Length == 2) { ushort ret = 0; foreach (byte cb in data) { ret <<= 8; ret += cb; } return (short)ret; } if (data.Length == 4) { uint ret = 0; foreach (byte cb in data) { ret <<= 8; ret += cb; } return (int)ret; } Debug.Assert(false); return (int)ByteToUInt_BE(data); } uint ByteToUInt_BE(byte[] data) { if (data == null || data.Length == 0) return 0; uint ret = 0; foreach (byte cb in data) { ret <<= 8; ret += cb; } return ret; }