PseudoLocalizationTranslator.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Translation;
  11. use Symfony\Component\Translation\Exception\LogicException;
  12. use Symfony\Contracts\Translation\TranslatorInterface;
  13. /**
  14. * This translator should only be used in a development environment.
  15. */
  16. final class PseudoLocalizationTranslator implements TranslatorInterface, TranslatorBagInterface
  17. {
  18. private const EXPANSION_CHARACTER = '~';
  19. private bool $accents;
  20. private float $expansionFactor;
  21. private bool $brackets;
  22. private bool $parseHTML;
  23. /**
  24. * @var string[]
  25. */
  26. private array $localizableHTMLAttributes;
  27. /**
  28. * Available options:
  29. * * accents:
  30. * type: boolean
  31. * default: true
  32. * description: replace ASCII characters of the translated string with accented versions or similar characters
  33. * example: if true, "foo" => "ƒöö".
  34. *
  35. * * expansion_factor:
  36. * type: float
  37. * default: 1
  38. * validation: it must be greater than or equal to 1
  39. * description: expand the translated string by the given factor with spaces and tildes
  40. * example: if 2, "foo" => "~foo ~"
  41. *
  42. * * brackets:
  43. * type: boolean
  44. * default: true
  45. * description: wrap the translated string with brackets
  46. * example: if true, "foo" => "[foo]"
  47. *
  48. * * parse_html:
  49. * type: boolean
  50. * default: false
  51. * description: parse the translated string as HTML - looking for HTML tags has a performance impact but allows to preserve them from alterations - it also allows to compute the visible translated string length which is useful to correctly expand ot when it contains HTML
  52. * warning: unclosed tags are unsupported, they will be fixed (closed) by the parser - eg, "foo <div>bar" => "foo <div>bar</div>"
  53. *
  54. * * localizable_html_attributes:
  55. * type: string[]
  56. * default: []
  57. * description: the list of HTML attributes whose values can be altered - it is only useful when the "parse_html" option is set to true
  58. * example: if ["title"], and with the "accents" option set to true, "<a href="#" title="Go to your profile">Profile</a>" => "<a href="#" title="Ĝö ţö ýöûŕ þŕöƒîļé">Þŕöƒîļé</a>" - if "title" was not in the "localizable_html_attributes" list, the title attribute data would be left unchanged.
  59. */
  60. public function __construct(
  61. private TranslatorInterface $translator,
  62. array $options = [],
  63. ) {
  64. $this->translator = $translator;
  65. $this->accents = $options['accents'] ?? true;
  66. if (1.0 > ($this->expansionFactor = $options['expansion_factor'] ?? 1.0)) {
  67. throw new \InvalidArgumentException('The expansion factor must be greater than or equal to 1.');
  68. }
  69. $this->brackets = $options['brackets'] ?? true;
  70. $this->parseHTML = $options['parse_html'] ?? false;
  71. if ($this->parseHTML && !$this->accents && 1.0 === $this->expansionFactor) {
  72. $this->parseHTML = false;
  73. }
  74. $this->localizableHTMLAttributes = $options['localizable_html_attributes'] ?? [];
  75. }
  76. public function trans(string $id, array $parameters = [], ?string $domain = null, ?string $locale = null): string
  77. {
  78. $trans = '';
  79. $visibleText = '';
  80. foreach ($this->getParts($this->translator->trans($id, $parameters, $domain, $locale)) as [$visible, $localizable, $text]) {
  81. if ($visible) {
  82. $visibleText .= $text;
  83. }
  84. if (!$localizable) {
  85. $trans .= $text;
  86. continue;
  87. }
  88. $this->addAccents($trans, $text);
  89. }
  90. $this->expand($trans, $visibleText);
  91. $this->addBrackets($trans);
  92. return $trans;
  93. }
  94. public function getLocale(): string
  95. {
  96. return $this->translator->getLocale();
  97. }
  98. public function getCatalogue(?string $locale = null): MessageCatalogueInterface
  99. {
  100. if (!$this->translator instanceof TranslatorBagInterface) {
  101. throw new LogicException(\sprintf('The "%s()" method cannot be called as the wrapped translator class "%s" does not implement the "%s".', __METHOD__, $this->translator::class, TranslatorBagInterface::class));
  102. }
  103. return $this->translator->getCatalogue($locale);
  104. }
  105. public function getCatalogues(): array
  106. {
  107. if (!$this->translator instanceof TranslatorBagInterface) {
  108. throw new LogicException(\sprintf('The "%s()" method cannot be called as the wrapped translator class "%s" does not implement the "%s".', __METHOD__, $this->translator::class, TranslatorBagInterface::class));
  109. }
  110. return $this->translator->getCatalogues();
  111. }
  112. private function getParts(string $originalTrans): array
  113. {
  114. if (!$this->parseHTML) {
  115. return [[true, true, $originalTrans]];
  116. }
  117. $html = mb_encode_numericentity($originalTrans, [0x80, 0x10FFFF, 0, 0x1FFFFF], mb_detect_encoding($originalTrans, null, true) ?: 'UTF-8');
  118. $useInternalErrors = libxml_use_internal_errors(true);
  119. $dom = new \DOMDocument();
  120. $dom->loadHTML('<trans>'.$html.'</trans>');
  121. libxml_clear_errors();
  122. libxml_use_internal_errors($useInternalErrors);
  123. return $this->parseNode($dom->childNodes->item(1)->childNodes->item(0)->childNodes->item(0));
  124. }
  125. private function parseNode(\DOMNode $node): array
  126. {
  127. $parts = [];
  128. foreach ($node->childNodes as $childNode) {
  129. if (!$childNode instanceof \DOMElement) {
  130. $parts[] = [true, true, $childNode->nodeValue];
  131. continue;
  132. }
  133. $parts[] = [false, false, '<'.$childNode->tagName];
  134. /** @var \DOMAttr $attribute */
  135. foreach ($childNode->attributes as $attribute) {
  136. $parts[] = [false, false, ' '.$attribute->nodeName.'="'];
  137. $localizableAttribute = \in_array($attribute->nodeName, $this->localizableHTMLAttributes, true);
  138. foreach (preg_split('/(&(?:amp|quot|#039|lt|gt);+)/', htmlspecialchars($attribute->nodeValue, \ENT_QUOTES, 'UTF-8'), -1, \PREG_SPLIT_DELIM_CAPTURE) as $i => $match) {
  139. if ('' === $match) {
  140. continue;
  141. }
  142. $parts[] = [false, $localizableAttribute && 0 === $i % 2, $match];
  143. }
  144. $parts[] = [false, false, '"'];
  145. }
  146. $parts[] = [false, false, '>'];
  147. $parts = array_merge($parts, $this->parseNode($childNode, $parts));
  148. $parts[] = [false, false, '</'.$childNode->tagName.'>'];
  149. }
  150. return $parts;
  151. }
  152. private function addAccents(string &$trans, string $text): void
  153. {
  154. $trans .= $this->accents ? strtr($text, [
  155. ' ' => ' ',
  156. '!' => '¡',
  157. '"' => '″',
  158. '#' => '♯',
  159. '$' => '€',
  160. '%' => '‰',
  161. '&' => '⅋',
  162. '\'' => '´',
  163. '(' => '{',
  164. ')' => '}',
  165. '*' => '⁎',
  166. '+' => '⁺',
  167. ',' => '،',
  168. '-' => '‐',
  169. '.' => '·',
  170. '/' => '⁄',
  171. '0' => '⓪',
  172. '1' => '①',
  173. '2' => '②',
  174. '3' => '③',
  175. '4' => '④',
  176. '5' => '⑤',
  177. '6' => '⑥',
  178. '7' => '⑦',
  179. '8' => '⑧',
  180. '9' => '⑨',
  181. ':' => '∶',
  182. ';' => '⁏',
  183. '<' => '≤',
  184. '=' => '≂',
  185. '>' => '≥',
  186. '?' => '¿',
  187. '@' => '՞',
  188. 'A' => 'Å',
  189. 'B' => 'Ɓ',
  190. 'C' => 'Ç',
  191. 'D' => 'Ð',
  192. 'E' => 'É',
  193. 'F' => 'Ƒ',
  194. 'G' => 'Ĝ',
  195. 'H' => 'Ĥ',
  196. 'I' => 'Î',
  197. 'J' => 'Ĵ',
  198. 'K' => 'Ķ',
  199. 'L' => 'Ļ',
  200. 'M' => 'Ṁ',
  201. 'N' => 'Ñ',
  202. 'O' => 'Ö',
  203. 'P' => 'Þ',
  204. 'Q' => 'Ǫ',
  205. 'R' => 'Ŕ',
  206. 'S' => 'Š',
  207. 'T' => 'Ţ',
  208. 'U' => 'Û',
  209. 'V' => 'Ṽ',
  210. 'W' => 'Ŵ',
  211. 'X' => 'Ẋ',
  212. 'Y' => 'Ý',
  213. 'Z' => 'Ž',
  214. '[' => '⁅',
  215. '\\' => '∖',
  216. ']' => '⁆',
  217. '^' => '˄',
  218. '_' => '‿',
  219. '`' => '‵',
  220. 'a' => 'å',
  221. 'b' => 'ƀ',
  222. 'c' => 'ç',
  223. 'd' => 'ð',
  224. 'e' => 'é',
  225. 'f' => 'ƒ',
  226. 'g' => 'ĝ',
  227. 'h' => 'ĥ',
  228. 'i' => 'î',
  229. 'j' => 'ĵ',
  230. 'k' => 'ķ',
  231. 'l' => 'ļ',
  232. 'm' => 'ɱ',
  233. 'n' => 'ñ',
  234. 'o' => 'ö',
  235. 'p' => 'þ',
  236. 'q' => 'ǫ',
  237. 'r' => 'ŕ',
  238. 's' => 'š',
  239. 't' => 'ţ',
  240. 'u' => 'û',
  241. 'v' => 'ṽ',
  242. 'w' => 'ŵ',
  243. 'x' => 'ẋ',
  244. 'y' => 'ý',
  245. 'z' => 'ž',
  246. '{' => '(',
  247. '|' => '¦',
  248. '}' => ')',
  249. '~' => '˞',
  250. ]) : $text;
  251. }
  252. private function expand(string &$trans, string $visibleText): void
  253. {
  254. if (1.0 >= $this->expansionFactor) {
  255. return;
  256. }
  257. $visibleLength = $this->strlen($visibleText);
  258. $missingLength = (int) ceil($visibleLength * $this->expansionFactor) - $visibleLength;
  259. if ($this->brackets) {
  260. $missingLength -= 2;
  261. }
  262. if (0 >= $missingLength) {
  263. return;
  264. }
  265. $words = [];
  266. $wordsCount = 0;
  267. foreach (preg_split('/ +/', $visibleText, -1, \PREG_SPLIT_NO_EMPTY) as $word) {
  268. $wordLength = $this->strlen($word);
  269. if ($wordLength >= $missingLength) {
  270. continue;
  271. }
  272. if (!isset($words[$wordLength])) {
  273. $words[$wordLength] = 0;
  274. }
  275. ++$words[$wordLength];
  276. ++$wordsCount;
  277. }
  278. if (!$words) {
  279. $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
  280. return;
  281. }
  282. arsort($words, \SORT_NUMERIC);
  283. $longestWordLength = max(array_keys($words));
  284. while (true) {
  285. $r = mt_rand(1, $wordsCount);
  286. foreach ($words as $length => $count) {
  287. $r -= $count;
  288. if ($r <= 0) {
  289. break;
  290. }
  291. }
  292. $trans .= ' '.str_repeat(self::EXPANSION_CHARACTER, $length);
  293. $missingLength -= $length + 1;
  294. if (0 === $missingLength) {
  295. return;
  296. }
  297. while ($longestWordLength >= $missingLength) {
  298. $wordsCount -= $words[$longestWordLength];
  299. unset($words[$longestWordLength]);
  300. if (!$words) {
  301. $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
  302. return;
  303. }
  304. $longestWordLength = max(array_keys($words));
  305. }
  306. }
  307. }
  308. private function addBrackets(string &$trans): void
  309. {
  310. if (!$this->brackets) {
  311. return;
  312. }
  313. $trans = '['.$trans.']';
  314. }
  315. private function strlen(string $s): int
  316. {
  317. return false === ($encoding = mb_detect_encoding($s, null, true)) ? \strlen($s) : mb_strlen($s, $encoding);
  318. }
  319. }