diff --git a/Cargo.toml b/Cargo.toml index 0a7542f..3fcb55d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,8 @@ alloc = [] full = [ "afrikaans", "albanian", + "assamese", + "bengali", "belarusian", "bulgarian", "catalan", @@ -33,28 +35,40 @@ full = [ "georgian", "german", "greek", + "gujarati", + "hindi", "hungarian", "icelandic", "italian", + "kannada", "kurmanji", "latin", "lithuanian", + "malayalam", + "marathi", "mongolian", "norwegian", + "oriya", + "panjabi", "polish", "portuguese", "russian", + "sanskrit", "serbian", "slovak", "slovenian", "spanish", "swedish", + "tamil", + "telugu", "turkish", "turkmen", "ukrainian", ] afrikaans = [] albanian = [] +assamese = [] +bengali = [] belarusian = [] bulgarian = [] catalan = [] @@ -70,22 +84,32 @@ galician = [] georgian = [] german = [] greek = [] +gujarati = [] +hindi = [] hungarian = [] icelandic = [] italian = [] +kannada = [] kurmanji = [] latin = [] lithuanian = [] +malayalam = [] +marathi = [] mongolian = [] norwegian = [] +oriya = [] +panjabi = [] polish = [] portuguese = [] russian = [] +sanskrit = [] serbian = [] slovak = [] slovenian = [] spanish = [] swedish = [] +tamil = [] +telugu = [] turkish = [] turkmen = [] ukrainian = [] diff --git a/README.md b/README.md index 8cfad81..a75f039 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ assert_eq!(syllables.join("-"), "ex-ten-sive"); ``` ## Languages -By default, this crate supports hyphenating more than 30 languages. Embedding +By default, this crate supports hyphenating 48 languages. Embedding automata for all these languages will add ~1.1 MiB to your binary. Alternatively, you can disable support for all languages and manually choose which ones get added: @@ -44,7 +44,9 @@ Each language added individually contributes: |------------|---------| | Afrikaans | 60 KiB | | Albanian | 1.4 KiB | +| Assamese | 209 B | | Belarusian | 3.9 KiB | +| Bengali | 209 B | | Bulgarian | 13 KiB | | Catalan | 1.7 KiB | | Croatian | 2.0 KiB | @@ -59,22 +61,32 @@ Each language added individually contributes: | Georgian | 11 KiB | | German | 201 KiB | | Greek | 2.0 KiB | +| Gujarati | 191 B | +| Hindi | 202 B | | Hungarian | 346 KiB | | Icelandic | 21 KiB | | Italian | 1.6 KiB | +| Kannada | 208 B | | Kurmanji | 1.4 KiB | | Latin | 1003 B | | Lithuanian | 6.5 KiB | +| Malayalam | 244 B | +| Marathi | 202 B | | Mongolian | 4.9 KiB | | Norwegian | 153 KiB | +| Oriya | 188 B | +| Panjabi | 176 B | | Polish | 16 KiB | | Portuguese | 1.0 KiB | | Russian | 33 KiB | +| Sanskrit | 2.0 KiB | | Serbian | 13 KiB | | Slovak | 13 KiB | | Slovenian | 5.5 KiB | | Spanish | 14 KiB | | Swedish | 24 KiB | +| Tamil | 178 B | +| Telugu | 206 B | | Turkish | 526 B | | Turkmen | 1.4 KiB | | Ukrainian | 21 KiB | diff --git a/patterns/hyph-as.tex b/patterns/hyph-as.tex new file mode 100644 index 0000000..2e16711 --- /dev/null +++ b/patterns/hyph-as.tex @@ -0,0 +1,142 @@ +% title: Hyphenation patterns for Assamese +% copyright: Copyright (C) 2016 Santhosh Thottingal +% notice: This file is part of the hyph-utf8 package. +% See http://www.hyphenation.org/tex for more information. +% source: https://github.com/santhoshtr/hyphenation/ +% language: +% name: Assamese +% tag: as +% version: 0.9.0 2016-01-16 +% authors: +% - +% name: Santhosh Thottingal +% contact: santhosh.thottingal (at) gmail.com +% licence: +% - This file is available under any of the following licences: +% - +% name: MIT +% url: https://opensource.org/licenses/MIT +% text: > +% Permission is hereby granted, free of charge, to any person +% obtaining a copy of this software and associated documentation +% files (the "Software"), to deal in the Software without +% restriction, including without limitation the rights to use, +% copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the +% Software is furnished to do so, subject to the following +% conditions: +% +% The above copyright notice and this permission notice shall be +% included in all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +% OTHER DEALINGS IN THE SOFTWARE. +% - +% name: LGPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/lgpl.html +% - +% name: GPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/gpl.html +% hyphenmins: +% generation: +% left: 1 +% right: 1 +% texlive: +% babelname: assamese +% message: Assamese hyphenation patterns +% package: indic +% +\patterns{ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER (U+200D) +2‍2 +% Break on both sides of ZERO-WIDTH NON JOINER (U+200C) +1‌1 +% Break before or after any independent vowel. +অ1 +আ1 +ই1 +ঈ1 +উ1 +ঊ1 +ঋ1 +ৠ1 +ঌ1 +ৡ1 +এ1 +ঐ1 +ও1 +ঔ1 +% Break after any dependent vowel, but not before. +া1 +ি1 +ী1 +ু1 +ূ1 +ৃ1 +ৄ1 +ৢ1 +ৣ1 +ে1 +ৈ1 +ো1 +ৌ1 +2়2 +ৗ1 +% Break before or after any consonant. +1ক +1খ +1গ +1ঘ +1ঙ +1চ +1ছ +1জ +1ঝ +1ঞ +1ট +1ঠ +1ড +1ড় +1ঢ +1ঢ় +1ণ +1ত +1থ +1দ +1ধ +1ন +1প +1ফ +1ব +1ভ +1ম +1য +1য় +1র +1ল +1শ +1ষ +1স +1হ +% Do not break after khanda ta. +ৎ1 +% Do not break before chandrabindu, anusvara, visarga, avagraha, +% nukta and au length mark. +2ঃ1 +2ং1 +2ঁ1 +2ঽ1 +% Do not break either side of virama (may be within conjunct). +2্2 +} diff --git a/patterns/hyph-bn.tex b/patterns/hyph-bn.tex new file mode 100644 index 0000000..985a4eb --- /dev/null +++ b/patterns/hyph-bn.tex @@ -0,0 +1,142 @@ +% title: Hyphenation patterns for Bengali +% copyright: Copyright (C) 2016 Santhosh Thottingal +% notice: This file is part of the hyph-utf8 package. +% See http://www.hyphenation.org/tex for more information. +% source: https://github.com/santhoshtr/hyphenation/ +% language: +% name: Bengali +% tag: bn +% version: 0.9.0 2016-01-16 +% authors: +% - +% name: Santhosh Thottingal +% contact: santhosh.thottingal (at) gmail.com +% licence: +% - This file is available under any of the following licences: +% - +% name: MIT +% url: https://opensource.org/licenses/MIT +% text: > +% Permission is hereby granted, free of charge, to any person +% obtaining a copy of this software and associated documentation +% files (the "Software"), to deal in the Software without +% restriction, including without limitation the rights to use, +% copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the +% Software is furnished to do so, subject to the following +% conditions: +% +% The above copyright notice and this permission notice shall be +% included in all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +% OTHER DEALINGS IN THE SOFTWARE. +% - +% name: LGPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/lgpl.html +% - +% name: GPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/gpl.html +% hyphenmins: +% generation: +% left: 1 +% right: 1 +% texlive: +% babelname: bengali +% message: Bengali hyphenation patterns +% package: indic +% +\patterns{ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER (U+200D) +2‍2 +% Break on both sides of ZERO-WIDTH NON JOINER (U+200C) +1‌1 +% Break before or after any independent vowel. +অ1 +আ1 +ই1 +ঈ1 +উ1 +ঊ1 +ঋ1 +ৠ1 +ঌ1 +ৡ1 +এ1 +ঐ1 +ও1 +ঔ1 +% Break after any dependent vowel, but not before. +া1 +ি1 +ী1 +ু1 +ূ1 +ৃ1 +ৄ1 +ৢ1 +ৣ1 +ে1 +ৈ1 +ো1 +ৌ1 +2়2 +ৗ1 +% Break before or after any consonant. +1ক +1খ +1গ +1ঘ +1ঙ +1চ +1ছ +1জ +1ঝ +1ঞ +1ট +1ঠ +1ড +1ড় +1ঢ +1ঢ় +1ণ +1ত +1থ +1দ +1ধ +1ন +1প +1ফ +1ব +1ভ +1ম +1য +1য় +1র +1ল +1শ +1ষ +1স +1হ +% Do not break after khanda ta. +ৎ1 +% Do not break before chandrabindu, anusvara, visarga, avagraha, +% nukta and au length mark. +2ঃ1 +2ং1 +2ঁ1 +2ঽ1 +% Do not break either side of virama (may be within conjunct). +2্2 +} diff --git a/patterns/hyph-gu.tex b/patterns/hyph-gu.tex new file mode 100644 index 0000000..df8f0d0 --- /dev/null +++ b/patterns/hyph-gu.tex @@ -0,0 +1,135 @@ +% title: Hyphenation patterns for Gujarati +% copyright: Copyright (C) 2016 Santhosh Thottingal +% notice: This file is part of the hyph-utf8 package. +% See http://www.hyphenation.org/tex for more information. +% source: https://github.com/santhoshtr/hyphenation/ +% language: +% name: Gujarati +% tag: gu +% version: 0.9.0 2016-01-16 +% authors: +% - +% name: Santhosh Thottingal +% contact: santhosh.thottingal (at) gmail.com +% licence: +% - This file is available under any of the following licences: +% - +% name: MIT +% url: https://opensource.org/licenses/MIT +% text: > +% Permission is hereby granted, free of charge, to any person +% obtaining a copy of this software and associated documentation +% files (the "Software"), to deal in the Software without +% restriction, including without limitation the rights to use, +% copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the +% Software is furnished to do so, subject to the following +% conditions: +% +% The above copyright notice and this permission notice shall be +% included in all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +% OTHER DEALINGS IN THE SOFTWARE. +% - +% name: LGPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/lgpl.html +% - +% name: GPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/gpl.html +% hyphenmins: +% generation: +% left: 1 +% right: 1 +% texlive: +% babelname: gujarati +% message: Gujarati hyphenation patterns +% package: indic +% +\patterns{ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER (U+200D) +2‍2 +% Break on both sides of ZERO-WIDTH NON JOINER (U+200C) +1‌1 +% Break before or after any independent vowel. +અ1 +આ1 +ઇ1 +ઈ1 +ઉ1 +ઊ1 +ઋ1 +ૠ1 +એ1 +ઐ1 +ઓ1 +ઔ1 +% Break after any dependent vowel but not before. +ા1 +િ1 +ી1 +ુ1 +ૂ1 +ૃ1 +ૄ1 +ૢ1 +ૣ1 +ે1 +ૈ1 +ો1 +ૌ1 +% Break before or after any consonant. +1ક +1ખ +1ગ +1ઘ +1ઙ +1ચ +1છ +1જ +1ઝ +1ઞ +1ટ +1ઠ +1ડ +1ઢ +1ણ +1ત +1થ +1દ +1ધ +1ન +1પ +1ફ +1બ +1ભ +1મ +1ય +1ર +1લ +1ળ +1વ +1શ +1ષ +1સ +1હ +% Do not break before chandrabindu, anusvara, visarga, avagraha +% and accents. +2ઁ1 +2ઃ1 +2ઽ1 +% Do not break either side of virama (may be within conjunct). +2્2 +2ં2 +} diff --git a/patterns/hyph-hi.tex b/patterns/hyph-hi.tex new file mode 100644 index 0000000..435f434 --- /dev/null +++ b/patterns/hyph-hi.tex @@ -0,0 +1,139 @@ +% title: Hyphenation patterns for Hindi +% copyright: Copyright (C) 2016 Santhosh Thottingal +% notice: This file is part of the hyph-utf8 package. +% See http://www.hyphenation.org/tex for more information. +% source: https://github.com/santhoshtr/hyphenation/ +% language: +% name: Hindi +% tag: hi +% version: 0.9.0 2016-01-16 +% authors: +% - +% name: Santhosh Thottingal +% contact: santhosh.thottingal (at) gmail.com +% licence: +% - This file is available under any of the following licences: +% - +% name: MIT +% url: https://opensource.org/licenses/MIT +% text: > +% Permission is hereby granted, free of charge, to any person +% obtaining a copy of this software and associated documentation +% files (the "Software"), to deal in the Software without +% restriction, including without limitation the rights to use, +% copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the +% Software is furnished to do so, subject to the following +% conditions: +% +% The above copyright notice and this permission notice shall be +% included in all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +% OTHER DEALINGS IN THE SOFTWARE. +% - +% name: LGPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/lgpl.html +% - +% name: GPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/gpl.html +% hyphenmins: +% generation: +% left: 1 +% right: 1 +% texlive: +% babelname: hindi +% message: Hindi hyphenation patterns +% package: indic +% +\patterns{ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER (U+200D) +2‍2 +% Break on both sides of ZERO-WIDTH NON JOINER (U+200C) +1‌1 +% Break before or after any independent vowel. +अ1 +आ1 +इ1 +ई1 +उ1 +ऊ1 +ऋ1 +ॠ1 +ऌ1 +ॡ1 +ए1 +ऐ1 +ओ1 +औ1 +% Break after any dependent vowel but not before. +ा1 +ि1 +ी1 +ु1 +ू1 +ृ1 +ॄ1 +ॢ1 +ॣ1 +े1 +ै1 +ो1 +ौ1 +% Break before or after any consonant. +1क +1ख +1ग +1घ +1ङ +1च +1छ +1ज +1झ +1ञ +1ट +1ठ +1ड +1ढ +1ण +1त +1थ +1द +1ध +1न +1प +1फ +1ब +1भ +1म +1य +1र +1ल +1ळ +1व +1श +1ष +1स +1ह +% Do not break before chandrabindu, anusvara, visarga, avagraha +% and accents. +2ँ1 +2ं1 +2ः1 +2ऽ1 +2॑1 +2॒1 +% Do not break either side of virama (may be within conjunct). +2्2 +} diff --git a/patterns/hyph-kn.tex b/patterns/hyph-kn.tex new file mode 100644 index 0000000..5b2ba98 --- /dev/null +++ b/patterns/hyph-kn.tex @@ -0,0 +1,142 @@ +% title: Hyphenation patterns for Kannada +% copyright: Copyright (C) 2016 Santhosh Thottingal +% notice: This file is part of the hyph-utf8 package. +% See http://www.hyphenation.org/tex for more information. +% source: https://github.com/santhoshtr/hyphenation/ +% language: +% name: Kannada +% tag: kn +% version: 0.9.0 2016-01-16 +% authors: +% - +% name: Santhosh Thottingal +% contact: santhosh.thottingal (at) gmail.com +% licence: +% - This file is available under any of the following licences: +% - +% name: MIT +% url: https://opensource.org/licenses/MIT +% text: > +% Permission is hereby granted, free of charge, to any person +% obtaining a copy of this software and associated documentation +% files (the "Software"), to deal in the Software without +% restriction, including without limitation the rights to use, +% copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the +% Software is furnished to do so, subject to the following +% conditions: +% +% The above copyright notice and this permission notice shall be +% included in all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +% OTHER DEALINGS IN THE SOFTWARE. +% - +% name: LGPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/lgpl.html +% - +% name: GPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/gpl.html +% hyphenmins: +% generation: +% left: 1 +% right: 1 +% texlive: +% babelname: kannada +% message: Kannada hyphenation patterns +% package: indic +% +\patterns{ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER (U+200D) +2‍2 +% Break on both sides of ZERO-WIDTH NON JOINER (U+200C) +1‌1 +% Break before or after any independent vowel. +ಅ1 +ಆ1 +ಇ1 +ಈ1 +ಉ1 +ಊ1 +ಋ1 +ೠ1 +ಌ1 +ೡ1 +ಎ1 +ಏ1 +ಐ1 +ಒ1 +ಓ1 +ಔ1 +% Break after any dependent vowel, but not before. +ಾ1 +ಿ1 +ೀ1 +ು1 +ೂ1 +ೃ1 +ೄ1 +ೆ1 +ೇ1 +ೈ1 +ೊ1 +ೋ1 +ೌ1 +% Break before or after any consonant. +1ಕ +1ಖ +1ಗ +1ಘ +1ಙ +1ಚ +1ಛ +1ಜ +1ಝ +1ಞ +1ಟ +1ಠ +1ಡ +1ಢ +1ಣ +1ತ +1ಥ +1ದ +1ಧ +1ನ +1ಪ +1ಫ +1ಬ +1ಭ +1ಮ +1ಯ +1ರ +1ಱ +1ಲ +1ಳ +1ೞ +1ವ +1ಶ +1ಷ +1ಸ +1ಹ +% Do not break before anusvara, visarga, avagraha, +% length mark and ai length mark. +2ಂ1 +2ಃ1 +2ಽ1 +2ೕ1 +2ೖ1 +% Do not break either side of virama (may be within conjunct). +2್2 +} diff --git a/patterns/hyph-ml.tex b/patterns/hyph-ml.tex new file mode 100644 index 0000000..b344995 --- /dev/null +++ b/patterns/hyph-ml.tex @@ -0,0 +1,157 @@ +% title: Hyphenation patterns for Malayalam +% copyright: Copyright (C) 2016 Santhosh Thottingal +% notice: This file is part of the hyph-utf8 package. +% See http://www.hyphenation.org/tex for more information. +% source: https://github.com/santhoshtr/hyphenation/ +% language: +% name: Malayalam +% tag: ml +% version: 0.9.0 2016-01-16 +% authors: +% - +% name: Santhosh Thottingal +% contact: santhosh.thottingal (at) gmail.com +% licence: +% - This file is available under any of the following licences: +% - +% name: MIT +% url: https://opensource.org/licenses/MIT +% text: > +% Permission is hereby granted, free of charge, to any person +% obtaining a copy of this software and associated documentation +% files (the "Software"), to deal in the Software without +% restriction, including without limitation the rights to use, +% copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the +% Software is furnished to do so, subject to the following +% conditions: +% +% The above copyright notice and this permission notice shall be +% included in all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +% OTHER DEALINGS IN THE SOFTWARE. +% - +% name: LGPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/lgpl.html +% - +% name: GPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/gpl.html +% hyphenmins: +% generation: +% left: 1 +% right: 1 +% texlive: +% babelname: malayalam +% message: Malayalam hyphenation patterns +% package: indic +% +\patterns{ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER (U+200D) +2‍2 +% Break on both sides of ZERO-WIDTH NON JOINER (U+200C) +1‌1 +% Break before or after any independent vowel. +1അ1 +1ആ1 +1ഇ1 +1ഈ1 +1ഉ1 +1ഊ1 +1ഋ1 +1ൠ1 +1ഌ1 +1ൡ1 +1എ1 +1ഏ1 +1ഐ1 +1ഒ1 +1ഓ1 +1ഔ1 +% Break after any dependent vowel, but not before. +ാ1 +ി1 +ീ1 +ു1 +ൂ1 +ൃ1 +െ1 +േ1 +ൈ1 +ൊ1 +ോ1 +ൌ1 +ൗ1 +% Break before or after any consonant. +1ക +1ഖ +1ഗ +1ഘ +1ങ +1ച +1ഛ +1ജ +1ഝ +1ഞ +1ട +1ഠ +1ഡ +1ഢ +1ണ +1ത +1ഥ +1ദ +1ധ +1ന +1പ +1ഫ +1ബ +1ഭ +1മ +1യ +1ര +1റ +1ല +1ള +1ഴ +1വ +1ശ +1ഷ +1സ +1ഹ +% Do not break before anusvara, visarga +2ഃ1 +2ം1 +% Do not break either side of virama (may be within conjunct). +2്2 +% Do not break left side of chillu +ന്2 +ര്2 +ള്2 +ല്2 +ക്2 +ണ്2 +2ന്‍ +2ല്‍ +2ള്‍ +2ണ്‍ +2ര്‍ +2ക്‍ +2ൺ +2ൻ +2ർ +2ൽ +2ൾ +2ൿ +} diff --git a/patterns/hyph-mr.tex b/patterns/hyph-mr.tex new file mode 100644 index 0000000..8f3c567 --- /dev/null +++ b/patterns/hyph-mr.tex @@ -0,0 +1,139 @@ +% title: Hyphenation patterns for Marathi +% copyright: Copyright (C) 2016 Santhosh Thottingal +% notice: This file is part of the hyph-utf8 package. +% See http://www.hyphenation.org/tex for more information. +% source: https://github.com/santhoshtr/hyphenation/ +% language: +% name: Marathi +% tag: mr +% version: 0.9.0 2016-01-16 +% authors: +% - +% name: Santhosh Thottingal +% contact: santhosh.thottingal (at) gmail.com +% licence: +% - This file is available under any of the following licences, at your option: +% - +% name: MIT +% url: https://opensource.org/licenses/MIT +% text: > +% Permission is hereby granted, free of charge, to any person +% obtaining a copy of this software and associated documentation +% files (the "Software"), to deal in the Software without +% restriction, including without limitation the rights to use, +% copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the +% Software is furnished to do so, subject to the following +% conditions: +% +% The above copyright notice and this permission notice shall be +% included in all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +% OTHER DEALINGS IN THE SOFTWARE. +% - +% name: LGPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/lgpl.html +% - +% name: GPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/gpl.html +% hyphenmins: +% generation: +% left: 1 +% right: 1 +% texlive: +% babelname: marathi +% message: Marathi hyphenation patterns +% package: indic +% +\patterns{ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER (U+200D) +2‍2 +% Break on both sides of ZERO-WIDTH NON JOINER (U+200C) +1‌1 +% Break before or after any independent vowel. +अ1 +आ1 +इ1 +ई1 +उ1 +ऊ1 +ऋ1 +ॠ1 +ऌ1 +ॡ1 +ए1 +ऐ1 +ओ1 +औ1 +% Break after any dependent vowel but not before. +ा1 +ि1 +ी1 +ु1 +ू1 +ृ1 +ॄ1 +ॢ1 +ॣ1 +े1 +ै1 +ो1 +ौ1 +% Break before or after any consonant. +1क +1ख +1ग +1घ +1ङ +1च +1छ +1ज +1झ +1ञ +1ट +1ठ +1ड +1ढ +1ण +1त +1थ +1द +1ध +1न +1प +1फ +1ब +1भ +1म +1य +1र +1ल +1ळ +1व +1श +1ष +1स +1ह +% Do not break before chandrabindu, anusvara, visarga, avagraha +% and accents. +2ँ1 +2ं1 +2ः1 +2ऽ1 +2॑1 +2॒1 +% Do not break either side of virama (may be within conjunct). +2्2 +} diff --git a/patterns/hyph-or.tex b/patterns/hyph-or.tex new file mode 100644 index 0000000..a601e2a --- /dev/null +++ b/patterns/hyph-or.tex @@ -0,0 +1,133 @@ +% title: Hyphenation patterns for Oriya +% copyright: Copyright (C) 2016 Santhosh Thottingal +% notice: This file is part of the hyph-utf8 package. +% See http://www.hyphenation.org/tex for more information. +% source: https://github.com/santhoshtr/hyphenation/ +% language: +% name: Odia, Oriya +% tag: or +% version: 0.9.0 2016-01-16 +% authors: +% - +% name: Santhosh Thottingal +% contact: santhosh.thottingal (at) gmail.com +% licence: +% - This file is available under any of the following licences: +% - +% name: MIT +% url: https://opensource.org/licenses/MIT +% text: > +% Permission is hereby granted, free of charge, to any person +% obtaining a copy of this software and associated documentation +% files (the “Software”), to deal in the Software without +% restriction, including without limitation the rights to use, +% copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the +% Software is furnished to do so, subject to the following +% conditions: +% +% The above copyright notice and this permission notice shall be +% included in all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, +% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +% OTHER DEALINGS IN THE SOFTWARE. +% - +% name: LGPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/lgpl.html +% - +% name: GPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/gpl.html +% hyphenmins: +% generation: +% left: 1 +% right: 1 +% texlive: +% babelname: oriya +% message: Oriya hyphenation patterns +% package: indic +% +\patterns{ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER (U+200D) +2‍2 +% Break on both sides of ZERO-WIDTH NON JOINER (U+200C) +1‌1 +% Break before or after any independent vowel. +ଅ1 +ଆ1 +ଇ1 +ଈ1 +ଉ1 +ଊ1 +ଋ1 +ୠ1 +ଌ1 +ୡ1 +ଏ1 +ଐ1 +ଓ1 +ଔ1 +% Break after any dependent vowel, but not before. +ା1 +ି1 +ୀ1 +ୁ1 +ୂ1 +ୃ1 +େ1 +ୈ1 +ୋ1 +ୌ1 +% Break before or after any consonant. +1କ +1ଖ +1ଗ +1ଘ +1ଙ +1ଚ +1ଛ +1ଜ +1ଝ +1ଞ +1ଟ +1ଠ +1ଡ +1ଢ +1ଣ +1ତ +1ଥ +1ଦ +1ଧ +1ନ +1ପ +1ଫ +1ବ +1ଭ +1ମ +1ଯ +1ର +1ଲ +1ଳ +1ଵ +1ଶ +1ଷ +1ସ +1ହ +% Do not break before anusvara, visarga and length mark. +2ଂ1 +2ଃ1 +2ୗ1 +2ଁ1 +% Do not break either side of virama (may be within conjunct). +2୍2 +} diff --git a/patterns/hyph-pa.tex b/patterns/hyph-pa.tex new file mode 100644 index 0000000..5647638 --- /dev/null +++ b/patterns/hyph-pa.tex @@ -0,0 +1,129 @@ +% title: Hyphenation patterns for Panjabi +% copyright: Copyright (C) 2016 Santhosh Thottingal +% notice: This file is part of the hyph-utf8 package. +% See http://www.hyphenation.org/tex for more information. +% source: https://github.com/santhoshtr/hyphenation/ +% language: +% name: Panjabi, Punjabi +% tag: pa +% version: 0.9.0 2016-01-16 +% authors: +% - +% name: Santhosh Thottingal +% contact: santhosh.thottingal (at) gmail.com +% licence: +% - This file is available under any of the following licences: +% - +% name: MIT +% url: https://opensource.org/licenses/MIT +% text: > +% Permission is hereby granted, free of charge, to any person +% obtaining a copy of this software and associated documentation +% files (the “Software”), to deal in the Software without +% restriction, including without limitation the rights to use, +% copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the +% Software is furnished to do so, subject to the following +% conditions: +% +% The above copyright notice and this permission notice shall be +% included in all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, +% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +% OTHER DEALINGS IN THE SOFTWARE. +% - +% name: LGPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/lgpl.html +% - +% name: GPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/gpl.html +% hyphenmins: +% generation: +% left: 1 +% right: 1 +% texlive: +% babelname: panjabi +% message: Panjabi hyphenation patterns +% package: indic +% +\patterns{ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER (U+200D) +2‍2 +% Break on both sides of ZERO-WIDTH NON JOINER (U+200C) +1‌1 +% Break before or after any independent vowel. +ਅ1 +ਆ1 +ਇ1 +ਈ1 +ਉ1 +ਊ1 +ਏ1 +ਐ1 +ਓ1 +ਔ1 +% Break after any dependent vowel but not before. +ਾ1 +ਿ1 +ੀ1 +ੁ1 +ੂ1 +ੇ1 +ੈ1 +ੋ1 +ੌ1 +% Break before or after any consonant. +1ਕ +1ਖ +1ਗ +1ਘ +1ਙ +1ਚ +1ਛ +1ਜ +1ਝ +1ਞ +1ਟ +1ਠ +1ਡ +1ਢ +1ਣ +1ਤ +1ਥ +1ਦ +1ਧ +1ਨ +1ਪ +1ਫ +1ਬ +1ਭ +1ਮ +1ਯ +1ਰ +1ਲ +1ਲ਼ +1ਵ +1ਸ਼ +1ਸ +1ਹ +% Do not break before chandrabindu, anusvara, visarga, avagraha +% and accents. +2ਁ1 +2ਂ1 +2ਃ1 +% Do not break either side of virama (may be within conjunct). +2੍2 +2ੰ2 +2ੱ2 +} diff --git a/patterns/hyph-sa.tex b/patterns/hyph-sa.tex new file mode 100644 index 0000000..2caf985 --- /dev/null +++ b/patterns/hyph-sa.tex @@ -0,0 +1,899 @@ +% title: Hyphenation patterns for Sanskrit and Prakrit, Latin transliteration, +% and Devanāgari, Bengali, Gujarati, Kannada, Malayalam, and Telugu scripts +% copyright: Copyright (C) 2006-2011 Yves Codet +% notice: This file is part of the hyph-utf8 package. +% See http://www.hyphenation.org/tex for more information. +% language: +% name: Sanskrit +% tag: sa +% version: 0.6 +% authors: +% - +% name: Yves Codet +% contact: ycodet (at) club-internet.fr +% licence: +% text: You may freely use, copy, modify and/or distribute this file. +% hyphenmins: +% generation: +% left: 1 +% right: 1 +% typesetting: +% left: 1 +% right: 3 +% changes: +% - Created: April 1st, 2005 +% - First release: June 8th, 2006 +% - Revised: September 14th, 2011 +% texlive: +% babelname: sanskrit +% message: Sanskrit hyphenation patterns +% description: |- +% Hyphenation patterns for Sanskrit and Prakrit in transliteration, +% and in Devanagari, Bengali, Kannada, Malayalam and Telugu scripts +% for Unicode engines. +% ========================================== +% Created by Yves Codet. Special thanks to Jonathan Kew and Benjamin Varghese who provided very useful informations. +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\patterns{ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER +% (U+200D) and ZERO-WIDTH NON-JOINER (U+200C) +% (Maybe this should be implemented/supported in engines already.) +2‍2 +2‌2 +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% TRANSLITERATION +%%%%%%%%%%%%%%%%%%%% +% Decomposed characters +% Break after any vowel, except inside a diphthong. +a1 +ā1 +i1 +ï1 +ī1 +u1 +ü1 +ū1 +r̥1 +ṛ1 +r̥̄1 +ṝ1 +l̥1 +ḷ1 +l̥̄1 +ḹ1 +e1 +o1 +a2i1 +a2u1 +% Break between a and i or u in hiatus. +a3ï1 +a3ü1 +% Do not break before a final consonant or consonant cluster. +2k. +2kh. +2g. +2gh. +2ṅ. +2c. +2ch. +2j. +2jh. +2ñ. +2ṭ. +2ṭh. +2ḍ. +2ḍh. +2ṇ. +2t. +2th. +2d. +2dh. +2n. +2p. +2ph. +2b. +2bh. +2m. +2y. +2r. +2l. +2ḷ. +2v. +2ś. +2ṣ. +2s. +2h. +2rk. +2rg. +2rṭ. +2rḍ. +2rt. +2rd. +2rp. +2rb. +% Break after anusvara, anunasika, visarga, jihvamuliya and +% upadhmaniya, but not before. +2ṃ1 +2ṁ1 +2m̐1 +2ḥ1 +2ẖ1 +2ḫ1 +% Do not break before a breve below, a candrabindu, a macron, +% a macron below, an acute, a grave or a diaeresis; break after +% a macron below, an acute or a grave. +2̮ +2̐ +2̄ +2̱1 +2́1 +2̀1 +2̈ +%%%%%%%%%%%%%%%%%%%% +% Precomposed characters +% Break after any vowel, except inside a diphthong. +á1 +à1 +ā1 +í1 +ì1 +ï1 +ī1 +ú1 +ù1 +ü1 +ū1 +é1 +è1 +ó1 +ò1 +a2í1 +a2ì1 +a2ú1 +a2ù1 +% Break between a and i or u in hiatus. +a3ï1 +a3ü1 +% Do not break before a final consonant or consonant cluster. +2ṅ. +2ñ. +2ṭ. +2ṭh. +2ḍ. +2ḍh. +2ṇ. +2ḷ. +2ś. +2ṣ. +2rṭ. +2rḍ. +% Break after anusvara, visarga, jihvamuliya and upadhmaniya, +% but not before. +2ṃ1 +2ṁ1 +2ḥ1 +2ẖ1 +2ḫ1 +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% DEVANAGARI SCRIPT +% Break before or after any independent vowel. +1अ1 +1आ1 +1इ1 +1ई1 +1उ1 +1ऊ1 +1ऋ1 +1ॠ1 +1ऌ1 +1ॡ1 +1ए1 +1ऐ1 +1ओ1 +1औ1 +% Break after any dependent vowel but not before. +2ा1 +2ि1 +2ी1 +2ु1 +2ू1 +2ृ1 +2ॄ1 +2ॢ1 +2ॣ1 +2े1 +2ै1 +2ो1 +2ौ1 +% Break before or after any consonant. +1क1 +1ख1 +1ग1 +1घ1 +1ङ1 +1च1 +1छ1 +1ज1 +1झ1 +1ञ1 +1ट1 +1ठ1 +1ड1 +1ढ1 +1ण1 +1त1 +1थ1 +1द1 +1ध1 +1न1 +1प1 +1फ1 +1ब1 +1भ1 +1म1 +1य1 +1र1 +1ल1 +1ळ1 +1व1 +1श1 +1ष1 +1स1 +1ह1 +% Do not break before a final consonant or conjunct. +2क्. +2ख्. +2ग्. +2घ्. +2ङ्. +2च्. +2छ्. +2ज्. +2झ्. +2ञ्. +2ट्. +2ठ्. +2ड्. +2ढ्. +2ण्. +2त्. +2थ्. +2द्. +2ध्. +2न्. +2प्. +2फ्. +2ब्. +2भ्. +2म्. +2य्. +2र्. +2ल्. +2ळ्. +2व्. +2श्. +2ष्. +2स्. +2ह्. +2र्क्. +2र्ट्. +2र्त्. +2र्प्. +% Do not break before chandrabindu, anusvara, visarga, avagraha +% and accents; do not break after avagraha. +2ँ +2ं +2ः +2ऽ2 +2॑ +2॒ +% Do not break either side of virama (may be within conjunct). +2्2 +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% BENGALI SCRIPT +% Break before or after any independent vowel. +1অ1 +1আ1 +1ই1 +1ঈ1 +1উ1 +1ঊ1 +1ঋ1 +1ৠ1 +1ঌ1 +1ৡ1 +1এ1 +1ঐ1 +1ও1 +1ঔ1 +% Break after any dependent vowel, but not before. +2া1 +2ি1 +2ী1 +2ু1 +2ূ1 +2ৃ1 +2ৄ1 +2ৢ1 +2ৣ1 +2ে1 +2ৈ1 +2ো1 +2ৌ1 +% Break before or after any consonant. +1ক1 +1খ1 +1গ1 +1ঘ1 +1ঙ1 +1চ1 +1ছ1 +1জ1 +1ঝ1 +1ঞ1 +1ট1 +1ঠ1 +1ড1 +1ড়1 +1ঢ1 +1ঢ়1 +1ণ1 +1ত1 +1থ1 +1দ1 +1ধ1 +1ন1 +1প1 +1ফ1 +1ব1 +1ভ1 +1ম1 +1য1 +1য়1 +1র1 +1ল1 +1শ1 +1ষ1 +1স1 +1হ1 +% Do not break after khanda ta. +1ৎ2 +% Do not break before a final consonant or conjunct. +2ক্. +2খ্. +2গ্. +2ঘ্. +2ঙ্. +2চ্. +2ছ্. +2জ্. +2ঝ্. +2ঞ্. +2ট্. +2ঠ্. +2ড্. +2ড়্. +2ঢ্. +2ঢ়্. +2ণ্. +2ত্. +2থ্. +2দ্. +2ধ্. +2ন্. +2প্. +2ফ্. +2ব্. +2ভ্. +2ম্. +2য্. +2য়্. +2র্. +2ল্. +2শ্. +2ষ্. +2স্. +2হ্. +2র্ক. +2র্ট. +2র্ত. +2র্প. +% Do not break before chandrabindu, anusvara, visarga, avagraha, +% nukta and au length mark; do not break after avagraha. +2ঁ +2ং +2ঃ +2ঽ2 +2় +2ৗ +% Do not break either side of virama (may be within conjunct). +2্2 +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% GUJARATI SCRIPT +% Break before or after any independent vowel. +1અ1 +1આ1 +1ઇ1 +1ઈ1 +1ઉ1 +1ઊ1 +1ઋ1 +1ૠ1 +1ઌ1 +1ૡ1 +1એ1 +1ઐ1 +1ઓ1 +1ઔ1 +% Break after any dependent vowel but not before. +2ા1 +2િ1 +2ી1 +2ુ1 +2ૂ1 +2ૃ1 +2ૄ1 +2ૢ1 +2ૣ1 +2ે1 +2ૈ1 +2ો1 +2ૌ1 +% Break before or after any consonant. +1ક1 +1ખ1 +1ગ1 +1ઘ1 +1ઙ1 +1ચ1 +1છ1 +1જ1 +1ઝ1 +1ઞ1 +1ટ1 +1ઠ1 +1ડ1 +1ઢ1 +1ણ1 +1ત1 +1થ1 +1દ1 +1ધ1 +1ન1 +1પ1 +1ફ1 +1બ1 +1ભ1 +1મ1 +1ય1 +1ર1 +1લ1 +1ળ1 +1વ1 +1શ1 +1ષ1 +1સ1 +1હ1 +% Do not break before a final consonant or conjunct. +2ક્. +2ખ્. +2ગ્. +2ઘ્. +2ઙ્. +2ચ્. +2છ્. +2જ્. +2ઝ્. +2ઞ્. +2ટ્. +2ઠ્. +2ડ્. +2ઢ્. +2ણ્. +2ત્. +2થ્. +2દ્. +2ધ્. +2ન્. +2પ્. +2ફ્. +2બ્. +2ભ્. +2મ્. +2ય્. +2ર્. +2લ્. +2ળ્. +2વ્. +2શ્. +2ષ્. +2સ્. +2હ્. +2ર્ક. +2ર્ટ. +2ર્ત. +2ર્પ. +% Do not break before chandrabindu, anusvara, visarga, avagraha; +% do not break after avagraha. +2ઁ +2ં +2ઃ +2ઽ2 +% Do not break either side of virama (may be within conjunct). +2્2 +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% KANNADA SCRIPT +% Break before or after any independent vowel. +1ಅ1 +1ಆ1 +1ಇ1 +1ಈ1 +1ಉ1 +1ಊ1 +1ಋ1 +1ೠ1 +1ಌ1 +1ೡ1 +1ಎ1 +1ಏ1 +1ಐ1 +1ಒ1 +1ಓ1 +1ಔ1 +% Break after any dependent vowel, but not before. +2ಾ1 +2ಿ1 +2ೀ1 +2ು1 +2ೂ1 +2ೃ1 +2ೄ1 +2ೆ1 +2ೇ1 +2ೈ1 +2ೊ1 +2ೋ1 +2ೌ1 +% Break before or after any consonant. +1ಕ1 +1ಖ1 +1ಗ1 +1ಘ1 +1ಙ1 +1ಚ1 +1ಛ1 +1ಜ1 +1ಝ1 +1ಞ1 +1ಟ1 +1ಠ1 +1ಡ1 +1ಢ1 +1ಣ1 +1ತ1 +1ಥ1 +1ದ1 +1ಧ1 +1ನ1 +1ಪ1 +1ಫ1 +1ಬ1 +1ಭ1 +1ಮ1 +1ಯ1 +1ರ1 +1ಱ1 % can occur in Sanskrit? +1ಲ1 +1ಳ1 +1ೞ1 % can occur in Sanskrit? +1ವ1 +1ಶ1 +1ಷ1 +1ಸ1 +1ಹ1 +% Do not break before a final consonant or conjunct. +2ಕ್. +2ಖ್. +2ಗ್. +2ಘ್. +2ಙ್. +2ಚ್. +2ಛ್. +2ಜ್. +2ಝ್. +2ಞ್. +2ಟ್. +2ಠ್. +2ಡ್. +2ಢ್. +2ಣ್. +2ತ್. +2ಥ್. +2ದ್. +2ಧ್. +2ನ್. +2ಪ್. +2ಫ್. +2ಬ್. +2ಭ್. +2ಮ್. +2ಯ್. +2ರ್. +2ಱ್. % can occur in Sanskrit? +2ಲ್. +2ಳ್. +2ವ್. +2ಶ್. +2ಷ್. +2ಸ್. +2ಹ್. +2ರ್ಕ. +2ರ್ಟ. +2ರ್ತ. +2ರ್ಪ. +% Do not break before anusvara, visarga, jihvamuliya, +% upadhmaniya, avagraha, length mark and ai length mark; +% do not break after avagraha. +2ಂ +2ಃ +2ೱ +2ೲ +2ಽ2 +2ೕ +2ೖ +% Do not break either side of virama (may be within conjunct). +2್2 +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% MALAYALAM SCRIPT +% Break before or after any independent vowel. +1അ1 +1ആ1 +1ഇ1 +1ഈ1 +1ഉ1 +1ഊ1 +1ഋ1 +1ൠ1 +1ഌ1 +1ൡ1 +1എ1 +1ഏ1 +1ഐ1 +1ഒ1 +1ഓ1 +1ഔ1 +% Break after any dependent vowel, but not before. +2ാ1 +2ി1 +2ീ1 +2ു1 +2ൂ1 +2ൃ1 +2െ1 +2േ1 +2ൈ1 +2ൊ1 +2ോ1 +2ൌ1 +% Break before or after any consonant. +1ക1 +1ഖ1 +1ഗ1 +1ഘ1 +1ങ1 +1ച1 +1ഛ1 +1ജ1 +1ഝ1 +1ഞ1 +1ട1 +1ഠ1 +1ഡ1 +1ഢ1 +1ണ1 +1ത1 +1ഥ1 +1ദ1 +1ധ1 +1ന1 +1പ1 +1ഫ1 +1ബ1 +1ഭ1 +1മ1 +1യ1 +1ര1 +1റ1 % can occur in Sanskrit? +1ല1 +1ള1 +1ഴ1 % can occur in Sanskrit? +1വ1 +1ശ1 +1ഷ1 +1സ1 +1ഹ1 +% Do not break before a final consonant or conjunct. +2ക്. +2ഖ്. +2ഗ്. +2ഘ്. +2ങ്. +2ച്. +2ഛ്. +2ജ്. +2ഝ്. +2ഞ്. +2ട്. +2ഠ്. +2ഡ്. +2ഢ്. +2ണ്. +2ത്. +2ഥ്. +2ദ്. +2ധ്. +2ന്. +2പ്. +2ഫ്. +2ബ്. +2ഭ്. +2മ്. +2യ്. +2ര്. +2റ്. % can occur in Sanskrit? +2ല്. +2ള്. +2ഴ്. % can occur in Sanskrit? +2വ്. +2ശ്. +2ഷ്. +2സ്. +2ഹ്. +2ര്ക. +2ര്ട. +2ര്ത. +2ര്പ. +% Do not break before a chillu. +%% Representation in Unicode 5.1: +2ൺ1 +2ൻ1 +2ർ1 +2ൽ1 +2ൾ1 +2ൿ1 +%% Representation in Unicode 5.0 and prior versions (i.e. NNA, NA, RA, LA, TA, LLA or KA + VIRAMA + ZWJ): +2ണ്‍1 +2ന്‍1 +2ര്‍1 +2ല്‍1 +2ത്‍1 +2ള്‍1 +2ക്‍1 +% Do not break before anusvara, visarga and length mark. +2ം +2ഃ +2ൗ +% Do not break either side of virama (may be within conjunct). +2്2 +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% TELUGU SCRIPT +% Break before or after any independent vowel. +1అ1 +1ఆ1 +1ఇ1 +1ఈ1 +1ఉ1 +1ఊ1 +1ఋ1 +1ౠ1 +1ఌ1 +1ౡ1 +1ఎ1 +1ఏ1 +1ఐ1 +1ఒ1 +1ఓ1 +1ఔ1 +% Break after any dependent vowel, but not before. +2ా1 +2ి1 +2ీ1 +2ు1 +2ూ1 +2ృ1 +2ౄ1 +2ె1 +2ే1 +2ై1 +2ొ1 +2ో1 +2ౌ1 +% Break before or after any consonant. +1క1 +1ఖ1 +1గ1 +1ఘ1 +1ఙ1 +1చ1 +1ఛ1 +1జ1 +1ఝ1 +1ఞ1 +1ట1 +1ఠ1 +1డ1 +1ఢ1 +1ణ1 +1త1 +1థ1 +1ద1 +1ధ1 +1న1 +1ప1 +1ఫ1 +1బ1 +1భ1 +1మ1 +1య1 +1ర1 +1ఱ1 % can occur in Sanskrit? +1ల1 +1ళ1 +1వ1 +1శ1 +1ష1 +1స1 +1హ1 +% Do not break before a final consonant or conjunct. +2క్. +2ఖ్. +2గ్. +2ఘ్. +2ఙ్. +2చ్. +2ఛ్. +2జ్. +2ఝ్. +2ఞ్. +2ట్. +2ఠ్. +2డ్. +2ఢ్. +2ణ్. +2త్. +2థ్. +2ద్. +2ధ్. +2న్. +2ప్. +2ఫ్. +2బ్. +2భ్. +2మ్. +2య్. +2ర్. +2ఱ్. % can occur in Sanskrit? +2ల్. +2ళ్. +2వ్. +2శ్. +2ష్. +2స్. +2హ్. +2ర్క్. +2ర్ట్. +2ర్త్. +2ర్ప్. +% Do not break before chandrabindu, anusvara, visarga, +% length mark and ai length mark. +2ఁ +2ం +2ః +2ౕ +2ౖ +% Do not break either side of virama (may be within conjunct). +2్2 +} diff --git a/patterns/hyph-ta.tex b/patterns/hyph-ta.tex new file mode 100644 index 0000000..54f0d43 --- /dev/null +++ b/patterns/hyph-ta.tex @@ -0,0 +1,140 @@ +% title: Hyphenation patterns for Tamil +% copyright: Copyright (C) 2016 Santhosh Thottingal +% notice: This file is part of the hyph-utf8 package. +% See http://www.hyphenation.org/tex for more information. +% source: https://github.com/santhoshtr/hyphenation/ +% language: +% name: Tamil +% tag: ta +% version: 0.9.0 2016-01-16 +% authors: +% - +% name: Santhosh Thottingal +% contact: santhosh.thottingal (at) gmail.com +% licence: +% - This file is available under any of the following licences: +% - +% name: MIT +% url: https://opensource.org/licenses/MIT +% text: > +% Permission is hereby granted, free of charge, to any person +% obtaining a copy of this software and associated documentation +% files (the "Software"), to deal in the Software without +% restriction, including without limitation the rights to use, +% copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the +% Software is furnished to do so, subject to the following +% conditions: +% +% The above copyright notice and this permission notice shall be +% included in all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +% OTHER DEALINGS IN THE SOFTWARE. +% - +% name: LGPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/lgpl.html +% - +% name: GPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/gpl.html +% hyphenmins: +% generation: +% left: 1 +% right: 1 +% texlive: +% babelname: tamil +% message: Tamil hyphenation patterns +% package: indic +% +\patterns{ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER (U+200D) +2‍2 +% Break on both sides of ZERO-WIDTH NON JOINER (U+200C) +1‌1 +% Break before or after any independent vowel. +1அ1 +1ஆ1 +1இ1 +1ஈ1 +1உ1 +1ஊ1 +1எ1 +1ஏ1 +1ஐ1 +1ஒ1 +1ஓ1 +1ஔ1 +% Break after any dependent vowel, but not before. +ா1 +ி1 +ீ1 +ு1 +ூ1 +ெ1 +ே1 +ை1 +ொ1 +ோ1 +ௌ1 +% Break before or after any consonant. +1க +1ங +1ச +1ஜ +1ஞ +1ட +1ண +1த +1ந +1ப +1ம +1ய +1ர +1ற +1ல +1ள +1ழ +1வ +1ஷ +1ஸ +1ஹ +% Do not break before any consonant + virama. +2க்1 +2ங்1 +2ச்1 +2ஞ்1 +2ட்1 +2ண்1 +2த்1 +2ன்1 +2ந்1 +2ப்1 +2ம்1 +2ய்1 +2ர்1 +2ற்1 +2ல்1 +2ள்1 +2ழ்1 +2வ்1 +2ஷ்1 +2ஸ்1 +2ஹ்1 +% Do not break before anusvara, visarga and length mark. +2ஂ1 +2ஃ1 +2ௗ1 +% Do not break before virama but break after virama. +2்1 +} diff --git a/patterns/hyph-te.tex b/patterns/hyph-te.tex new file mode 100644 index 0000000..e12228b --- /dev/null +++ b/patterns/hyph-te.tex @@ -0,0 +1,141 @@ +% title: Hyphenation patterns for Telugu +% copyright: Copyright (C) 2016 Santhosh Thottingal +% notice: This file is part of the hyph-utf8 package. +% See http://www.hyphenation.org/tex for more information. +% source: https://github.com/santhoshtr/hyphenation/ +% language: +% name: Telugu +% tag: te +% version: 0.9.0 2016-01-16 +% authors: +% - +% name: Santhosh Thottingal +% contact: santhosh.thottingal (at) gmail.com +% licence: +% - This file is available under any of the following licences: +% - +% name: MIT +% url: https://opensource.org/licenses/MIT +% text: > +% Permission is hereby granted, free of charge, to any person +% obtaining a copy of this software and associated documentation +% files (the "Software"), to deal in the Software without +% restriction, including without limitation the rights to use, +% copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the +% Software is furnished to do so, subject to the following +% conditions: +% +% The above copyright notice and this permission notice shall be +% included in all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +% OTHER DEALINGS IN THE SOFTWARE. +% - +% name: LGPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/lgpl.html +% - +% name: GPL +% version: 3 +% or_later: true +% url: http://www.gnu.org/licenses/gpl.html +% hyphenmins: +% generation: +% left: 1 +% right: 1 +% texlive: +% babelname: telugu +% message: Telugu hyphenation patterns +% package: indic +% +\patterns{ +% GENERAL RULE +% Do not break either side of ZERO-WIDTH JOINER (U+200D) +2‍2 +% Break on both sides of ZERO-WIDTH NON JOINER (U+200C) +1‌1 +% Break before or after any independent vowel. +అ1 +ఆ1 +ఇ1 +ఈ1 +ఉ1 +ఊ1 +ఋ1 +ౠ1 +ఌ1 +ౡ1 +ఎ1 +ఏ1 +ఐ1 +ఒ1 +ఓ1 +ఔ1 +% Break after any dependent vowel, but not before. +ా1 +ి1 +ీ1 +ు1 +ూ1 +ృ1 +ౄ1 +ె1 +ే1 +ై1 +ొ1 +ో1 +ౌ1 +% Break before or after any consonant. +1క +1ఖ +1గ +1ఘ +1ఙ +1చ +1ఛ +1జ +1ఝ +1ఞ +1ట +1ఠ +1డ +1ఢ +1ణ +1త +1థ +1ద +1ధ +1న +1ప +1ఫ +1బ +1భ +1మ +1య +1ర +1ఱ +1ల +1ళ +1వ +1శ +1ష +1స +1హ +% Do not break before chandrabindu, anusvara, visarga, +% length mark and ai length mark. +2ఁ1 +2ం1 +2ః1 +2ౕ1 +2ౖ1 +% Do not break either side of virama (may be within conjunct). +2్2 +} diff --git a/src/lang.rs b/src/lang.rs index b9d59a2..5568ace 100644 --- a/src/lang.rs +++ b/src/lang.rs @@ -15,9 +15,15 @@ pub enum Lang { /// Hyphenation for _Albanian._ (Code: `sq`, Script, `Latn`, Feature: `albanian`) #[cfg(feature = "albanian")] Albanian, + /// Hyphenation for _Assamese._ (Code: `as`, Script, `Beng`, Feature: `assamese`) + #[cfg(feature = "assamese")] + Assamese, /// Hyphenation for _Belarusian._ (Code: `be`, Script, `Cyrl`, Feature: `belarusian`) #[cfg(feature = "belarusian")] Belarusian, + /// Hyphenation for _Bengali._ (Code: `bn`, Script, `Beng`, Feature: `bengali`) + #[cfg(feature = "bengali")] + Bengali, /// Hyphenation for _Bulgarian._ (Code: `bg`, Script, `Cyrl`, Feature: `bulgarian`) #[cfg(feature = "bulgarian")] Bulgarian, @@ -60,6 +66,12 @@ pub enum Lang { /// Hyphenation for _Greek._ (Code: `el`, Script, `Grek`, Feature: `greek`) #[cfg(feature = "greek")] Greek, + /// Hyphenation for _Gujarati._ (Code: `gu`, Script, `Gujr`, Feature: `gujarati`) + #[cfg(feature = "gujarati")] + Gujarati, + /// Hyphenation for _Hindi._ (Code: `hi`, Script, `Deva`, Feature: `hindi`) + #[cfg(feature = "hindi")] + Hindi, /// Hyphenation for _Hungarian._ (Code: `hu`, Script, `Latn`, Feature: `hungarian`) #[cfg(feature = "hungarian")] Hungarian, @@ -69,6 +81,9 @@ pub enum Lang { /// Hyphenation for _Italian._ (Code: `it`, Script, `Latn`, Feature: `italian`) #[cfg(feature = "italian")] Italian, + /// Hyphenation for _Kannada._ (Code: `kn`, Script, `Knda`, Feature: `kannada`) + #[cfg(feature = "kannada")] + Kannada, /// Hyphenation for _Kurmanji._ (Code: `ku`, Script, `Latn`, Feature: `kurmanji`) #[cfg(feature = "kurmanji")] Kurmanji, @@ -78,12 +93,24 @@ pub enum Lang { /// Hyphenation for _Lithuanian._ (Code: `lt`, Script, `Latn`, Feature: `lithuanian`) #[cfg(feature = "lithuanian")] Lithuanian, + /// Hyphenation for _Malayalam._ (Code: `ml`, Script, `Mlym`, Feature: `malayalam`) + #[cfg(feature = "malayalam")] + Malayalam, + /// Hyphenation for _Marathi._ (Code: `mr`, Script, `Deva`, Feature: `marathi`) + #[cfg(feature = "marathi")] + Marathi, /// Hyphenation for _Mongolian._ (Code: `mn`, Script, `Cyrl`, Feature: `mongolian`) #[cfg(feature = "mongolian")] Mongolian, /// Hyphenation for _Norwegian._ (Code: `no`, Alias: `nb`, Alias: `nn`, Script, `Latn`, Feature: `norwegian`) #[cfg(feature = "norwegian")] Norwegian, + /// Hyphenation for _Oriya._ (Code: `or`, Script, `Orya`, Feature: `oriya`) + #[cfg(feature = "oriya")] + Oriya, + /// Hyphenation for _Panjabi._ (Code: `pa`, Script, `Guru`, Feature: `panjabi`) + #[cfg(feature = "panjabi")] + Panjabi, /// Hyphenation for _Polish._ (Code: `pl`, Script, `Latn`, Feature: `polish`) #[cfg(feature = "polish")] Polish, @@ -93,6 +120,9 @@ pub enum Lang { /// Hyphenation for _Russian._ (Code: `ru`, Script, `Cyrl`, Feature: `russian`) #[cfg(feature = "russian")] Russian, + /// Hyphenation for _Sanskrit._ (Code: `sa`, Script, `Deva`, Feature: `sanskrit`) + #[cfg(feature = "sanskrit")] + Sanskrit, /// Hyphenation for _Serbian._ (Code: `sr`, Script, `Cyrl`, Feature: `serbian`) #[cfg(feature = "serbian")] Serbian, @@ -108,6 +138,12 @@ pub enum Lang { /// Hyphenation for _Swedish._ (Code: `sv`, Script, `Latn`, Feature: `swedish`) #[cfg(feature = "swedish")] Swedish, + /// Hyphenation for _Tamil._ (Code: `ta`, Script, `Taml`, Feature: `tamil`) + #[cfg(feature = "tamil")] + Tamil, + /// Hyphenation for _Telugu._ (Code: `te`, Script, `Telu`, Feature: `telugu`) + #[cfg(feature = "telugu")] + Telugu, /// Hyphenation for _Turkish._ (Code: `tr`, Script, `Latn`, Feature: `turkish`) #[cfg(feature = "turkish")] Turkish, @@ -127,8 +163,12 @@ impl Lang { b"af" => Some(Self::Afrikaans), #[cfg(feature = "albanian")] b"sq" => Some(Self::Albanian), + #[cfg(feature = "assamese")] + b"as" => Some(Self::Assamese), #[cfg(feature = "belarusian")] b"be" => Some(Self::Belarusian), + #[cfg(feature = "bengali")] + b"bn" => Some(Self::Bengali), #[cfg(feature = "bulgarian")] b"bg" => Some(Self::Bulgarian), #[cfg(feature = "catalan")] @@ -157,18 +197,28 @@ impl Lang { b"de" => Some(Self::German), #[cfg(feature = "greek")] b"el" => Some(Self::Greek), + #[cfg(feature = "gujarati")] + b"gu" => Some(Self::Gujarati), + #[cfg(feature = "hindi")] + b"hi" => Some(Self::Hindi), #[cfg(feature = "hungarian")] b"hu" => Some(Self::Hungarian), #[cfg(feature = "icelandic")] b"is" => Some(Self::Icelandic), #[cfg(feature = "italian")] b"it" => Some(Self::Italian), + #[cfg(feature = "kannada")] + b"kn" => Some(Self::Kannada), #[cfg(feature = "kurmanji")] b"ku" => Some(Self::Kurmanji), #[cfg(feature = "latin")] b"la" => Some(Self::Latin), #[cfg(feature = "lithuanian")] b"lt" => Some(Self::Lithuanian), + #[cfg(feature = "malayalam")] + b"ml" => Some(Self::Malayalam), + #[cfg(feature = "marathi")] + b"mr" => Some(Self::Marathi), #[cfg(feature = "mongolian")] b"mn" => Some(Self::Mongolian), #[cfg(feature = "norwegian")] @@ -177,12 +227,18 @@ impl Lang { b"nb" => Some(Self::Norwegian), #[cfg(feature = "norwegian")] b"nn" => Some(Self::Norwegian), + #[cfg(feature = "oriya")] + b"or" => Some(Self::Oriya), + #[cfg(feature = "panjabi")] + b"pa" => Some(Self::Panjabi), #[cfg(feature = "polish")] b"pl" => Some(Self::Polish), #[cfg(feature = "portuguese")] b"pt" => Some(Self::Portuguese), #[cfg(feature = "russian")] b"ru" => Some(Self::Russian), + #[cfg(feature = "sanskrit")] + b"sa" => Some(Self::Sanskrit), #[cfg(feature = "serbian")] b"sr" => Some(Self::Serbian), #[cfg(feature = "slovak")] @@ -193,6 +249,10 @@ impl Lang { b"es" => Some(Self::Spanish), #[cfg(feature = "swedish")] b"sv" => Some(Self::Swedish), + #[cfg(feature = "tamil")] + b"ta" => Some(Self::Tamil), + #[cfg(feature = "telugu")] + b"te" => Some(Self::Telugu), #[cfg(feature = "turkish")] b"tr" => Some(Self::Turkish), #[cfg(feature = "turkmen")] @@ -213,8 +273,12 @@ impl Lang { Self::Afrikaans => (1, 2), #[cfg(feature = "albanian")] Self::Albanian => (2, 2), + #[cfg(feature = "assamese")] + Self::Assamese => (2, 2), #[cfg(feature = "belarusian")] Self::Belarusian => (2, 2), + #[cfg(feature = "bengali")] + Self::Bengali => (2, 2), #[cfg(feature = "bulgarian")] Self::Bulgarian => (2, 2), #[cfg(feature = "catalan")] @@ -243,28 +307,44 @@ impl Lang { Self::German => (2, 2), #[cfg(feature = "greek")] Self::Greek => (1, 1), + #[cfg(feature = "gujarati")] + Self::Gujarati => (2, 2), + #[cfg(feature = "hindi")] + Self::Hindi => (2, 2), #[cfg(feature = "hungarian")] Self::Hungarian => (2, 2), #[cfg(feature = "icelandic")] Self::Icelandic => (2, 2), #[cfg(feature = "italian")] Self::Italian => (2, 2), + #[cfg(feature = "kannada")] + Self::Kannada => (2, 2), #[cfg(feature = "kurmanji")] Self::Kurmanji => (2, 2), #[cfg(feature = "latin")] Self::Latin => (2, 2), #[cfg(feature = "lithuanian")] Self::Lithuanian => (2, 2), + #[cfg(feature = "malayalam")] + Self::Malayalam => (2, 2), + #[cfg(feature = "marathi")] + Self::Marathi => (2, 2), #[cfg(feature = "mongolian")] Self::Mongolian => (2, 2), #[cfg(feature = "norwegian")] Self::Norwegian => (2, 2), + #[cfg(feature = "oriya")] + Self::Oriya => (2, 2), + #[cfg(feature = "panjabi")] + Self::Panjabi => (2, 2), #[cfg(feature = "polish")] Self::Polish => (2, 2), #[cfg(feature = "portuguese")] Self::Portuguese => (2, 3), #[cfg(feature = "russian")] Self::Russian => (2, 2), + #[cfg(feature = "sanskrit")] + Self::Sanskrit => (2, 2), #[cfg(feature = "serbian")] Self::Serbian => (2, 2), #[cfg(feature = "slovak")] @@ -275,6 +355,10 @@ impl Lang { Self::Spanish => (2, 2), #[cfg(feature = "swedish")] Self::Swedish => (2, 2), + #[cfg(feature = "tamil")] + Self::Tamil => (2, 2), + #[cfg(feature = "telugu")] + Self::Telugu => (2, 2), #[cfg(feature = "turkish")] Self::Turkish => (2, 2), #[cfg(feature = "turkmen")] @@ -290,8 +374,12 @@ impl Lang { Self::Afrikaans => State::root(include_bytes!("../tries/af.bin")), #[cfg(feature = "albanian")] Self::Albanian => State::root(include_bytes!("../tries/sq.bin")), + #[cfg(feature = "assamese")] + Self::Assamese => State::root(include_bytes!("../tries/as.bin")), #[cfg(feature = "belarusian")] Self::Belarusian => State::root(include_bytes!("../tries/be.bin")), + #[cfg(feature = "bengali")] + Self::Bengali => State::root(include_bytes!("../tries/bn.bin")), #[cfg(feature = "bulgarian")] Self::Bulgarian => State::root(include_bytes!("../tries/bg.bin")), #[cfg(feature = "catalan")] @@ -320,28 +408,44 @@ impl Lang { Self::German => State::root(include_bytes!("../tries/de.bin")), #[cfg(feature = "greek")] Self::Greek => State::root(include_bytes!("../tries/el.bin")), + #[cfg(feature = "gujarati")] + Self::Gujarati => State::root(include_bytes!("../tries/gu.bin")), + #[cfg(feature = "hindi")] + Self::Hindi => State::root(include_bytes!("../tries/hi.bin")), #[cfg(feature = "hungarian")] Self::Hungarian => State::root(include_bytes!("../tries/hu.bin")), #[cfg(feature = "icelandic")] Self::Icelandic => State::root(include_bytes!("../tries/is.bin")), #[cfg(feature = "italian")] Self::Italian => State::root(include_bytes!("../tries/it.bin")), + #[cfg(feature = "kannada")] + Self::Kannada => State::root(include_bytes!("../tries/kn.bin")), #[cfg(feature = "kurmanji")] Self::Kurmanji => State::root(include_bytes!("../tries/ku.bin")), #[cfg(feature = "latin")] Self::Latin => State::root(include_bytes!("../tries/la.bin")), #[cfg(feature = "lithuanian")] Self::Lithuanian => State::root(include_bytes!("../tries/lt.bin")), + #[cfg(feature = "malayalam")] + Self::Malayalam => State::root(include_bytes!("../tries/ml.bin")), + #[cfg(feature = "marathi")] + Self::Marathi => State::root(include_bytes!("../tries/mr.bin")), #[cfg(feature = "mongolian")] Self::Mongolian => State::root(include_bytes!("../tries/mn.bin")), #[cfg(feature = "norwegian")] Self::Norwegian => State::root(include_bytes!("../tries/no.bin")), + #[cfg(feature = "oriya")] + Self::Oriya => State::root(include_bytes!("../tries/or.bin")), + #[cfg(feature = "panjabi")] + Self::Panjabi => State::root(include_bytes!("../tries/pa.bin")), #[cfg(feature = "polish")] Self::Polish => State::root(include_bytes!("../tries/pl.bin")), #[cfg(feature = "portuguese")] Self::Portuguese => State::root(include_bytes!("../tries/pt.bin")), #[cfg(feature = "russian")] Self::Russian => State::root(include_bytes!("../tries/ru.bin")), + #[cfg(feature = "sanskrit")] + Self::Sanskrit => State::root(include_bytes!("../tries/sa.bin")), #[cfg(feature = "serbian")] Self::Serbian => State::root(include_bytes!("../tries/sr.bin")), #[cfg(feature = "slovak")] @@ -352,6 +456,10 @@ impl Lang { Self::Spanish => State::root(include_bytes!("../tries/es.bin")), #[cfg(feature = "swedish")] Self::Swedish => State::root(include_bytes!("../tries/sv.bin")), + #[cfg(feature = "tamil")] + Self::Tamil => State::root(include_bytes!("../tries/ta.bin")), + #[cfg(feature = "telugu")] + Self::Telugu => State::root(include_bytes!("../tries/te.bin")), #[cfg(feature = "turkish")] Self::Turkish => State::root(include_bytes!("../tries/tr.bin")), #[cfg(feature = "turkmen")] diff --git a/src/lib.rs b/src/lib.rs index 89641b0..8c484c7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -564,4 +564,18 @@ mod tests { test(Czech, "nej-ja-s-něj-ší"); test(Czech, "br-něn-ský"); } + + #[test] + #[cfg(feature = "malayalam")] + fn test_malayalam() { + test(Malayalam, "വി-ദ്യാർ-ത്ഥി"); + test(Malayalam, "പഠി-ക്കുക"); + } + + #[test] + #[cfg(feature = "hindi")] + fn test_hindi() { + test(Hindi, "वि-द्या-र्थी"); + test(Hindi, "पढ़-ना"); + } } diff --git a/tests/generate.rs b/tests/generate.rs index 4f702a4..c4f40fd 100644 --- a/tests/generate.rs +++ b/tests/generate.rs @@ -7,9 +7,11 @@ use std::path::Path; #[test] fn generate_code() { - let mut languages: [(&str, &str, &[&str], &str, &str, u8, u8); 36] = [ + let mut languages: [(&str, &str, &[&str], &str, &str, u8, u8); 48] = [ ("Afrikaans", "af", &[], "Latn", "hyph-af.tex", 1, 2), + ("Assamese", "as", &[], "Beng", "hyph-as.tex", 2, 2), ("Belarusian", "be", &[], "Cyrl", "hyph-be.tex", 2, 2), + ("Bengali", "bn", &[], "Beng", "hyph-bn.tex", 2, 2), ("Bulgarian", "bg", &[], "Cyrl", "hyph-bg.tex", 2, 2), ("Catalan", "ca", &[], "Latn", "hyph-ca.tex", 2, 2), ("Czech", "cs", &[], "Latn", "hyph-cs-sojka.tex", 2, 2), @@ -22,25 +24,35 @@ fn generate_code() { ("Estonian", "et", &[], "Latn", "hyph-et.tex", 2, 3), ("Finnish", "fi", &[], "Latn", "hyph-fi.tex", 2, 2), ("French", "fr", &[], "Latn", "hyph-fr.tex", 2, 2), + ("Gujarati", "gu", &[], "Gujr", "hyph-gu.tex", 2, 2), + ("Hindi", "hi", &[], "Deva", "hyph-hi.tex", 2, 2), ("Croatian", "hr", &[], "Latn", "hyph-hr.tex", 2, 2), ("Hungarian", "hu", &[], "Latn", "hyph-hu.tex", 2, 2), ("Icelandic", "is", &[], "Latn", "hyph-is.tex", 2, 2), ("Italian", "it", &[], "Latn", "hyph-it.tex", 2, 2), + ("Kannada", "kn", &[], "Knda", "hyph-kn.tex", 2, 2), ("Georgian", "ka", &[], "Geor", "hyph-ka.tex", 1, 2), ("Kurmanji", "ku", &[], "Latn", "hyph-kmr.tex", 2, 2), ("Latin", "la", &[], "Latn", "hyph-la.tex", 2, 2), ("Lithuanian", "lt", &[], "Latn", "hyph-lt.tex", 2, 2), + ("Malayalam", "ml", &[], "Mlym", "hyph-ml.tex", 2, 2), + ("Marathi", "mr", &[], "Deva", "hyph-mr.tex", 2, 2), ("Mongolian", "mn", &[], "Cyrl", "hyph-mn.tex", 2, 2), ("Dutch", "nl", &[], "Latn", "hyph-nl.tex", 2, 2), ("Norwegian", "no", &["nb", "nn"], "Latn", "hyph-no.tex", 2, 2), + ("Oriya", "or", &[], "Orya", "hyph-or.tex", 2, 2), + ("Panjabi", "pa", &[], "Guru", "hyph-pa.tex", 2, 2), ("Polish", "pl", &[], "Latn", "hyph-pl.tex", 2, 2), ("Portuguese", "pt", &[], "Latn", "hyph-pt.tex", 2, 3), ("Russian", "ru", &[], "Cyrl", "hyph-ru.tex", 2, 2), + ("Sanskrit", "sa", &[], "Deva", "hyph-sa.tex", 2, 2), ("Serbian", "sr", &[], "Cyrl", "hyph-sh-cyrl.tex", 2, 2), ("Slovak", "sk", &[], "Latn", "hyph-sk.tex", 2, 3), ("Slovenian", "sl", &[], "Latn", "hyph-sl.tex", 2, 2), ("Albanian", "sq", &[], "Latn", "hyph-sq.tex", 2, 2), ("Swedish", "sv", &[], "Latn", "hyph-sv.tex", 2, 2), + ("Tamil", "ta", &[], "Taml", "hyph-ta.tex", 2, 2), + ("Telugu", "te", &[], "Telu", "hyph-te.tex", 2, 2), ("Turkmen", "tk", &[], "Latn", "hyph-tk.tex", 2, 2), ("Turkish", "tr", &[], "Latn", "hyph-tr.tex", 2, 2), ("Ukrainian", "uk", &[], "Cyrl", "hyph-uk.tex", 2, 2), diff --git a/tries/as.bin b/tries/as.bin new file mode 100644 index 0000000..9311070 Binary files /dev/null and b/tries/as.bin differ diff --git a/tries/bn.bin b/tries/bn.bin new file mode 100644 index 0000000..9311070 Binary files /dev/null and b/tries/bn.bin differ diff --git a/tries/gu.bin b/tries/gu.bin new file mode 100644 index 0000000..3f8da88 Binary files /dev/null and b/tries/gu.bin differ diff --git a/tries/hi.bin b/tries/hi.bin new file mode 100644 index 0000000..5a02129 Binary files /dev/null and b/tries/hi.bin differ diff --git a/tries/kn.bin b/tries/kn.bin new file mode 100644 index 0000000..badfe6e Binary files /dev/null and b/tries/kn.bin differ diff --git a/tries/ml.bin b/tries/ml.bin new file mode 100644 index 0000000..add67ba Binary files /dev/null and b/tries/ml.bin differ diff --git a/tries/mr.bin b/tries/mr.bin new file mode 100644 index 0000000..5a02129 Binary files /dev/null and b/tries/mr.bin differ diff --git a/tries/or.bin b/tries/or.bin new file mode 100644 index 0000000..4039bc0 Binary files /dev/null and b/tries/or.bin differ diff --git a/tries/pa.bin b/tries/pa.bin new file mode 100644 index 0000000..4683bd7 Binary files /dev/null and b/tries/pa.bin differ diff --git a/tries/sa.bin b/tries/sa.bin new file mode 100644 index 0000000..d4b209c Binary files /dev/null and b/tries/sa.bin differ diff --git a/tries/ta.bin b/tries/ta.bin new file mode 100644 index 0000000..bd03d4a Binary files /dev/null and b/tries/ta.bin differ diff --git a/tries/te.bin b/tries/te.bin new file mode 100644 index 0000000..276b828 Binary files /dev/null and b/tries/te.bin differ