ソースを参照

Migrate Typographic Behavior to 3.x

dereuromark 8 年 前
コミット
e1509592e8

+ 1 - 1
docs/Behavior/Passwordable.md

@@ -184,7 +184,7 @@ $rules = ['validateCustom' => [
 		'last' => true,
 	]
 );
-$this->User->Behaviors->load('Tools.Passwordable', ['customValidation' => $rules]);
+$this->Users->Behaviors->load('Tools.Passwordable', ['customValidation' => $rules]);
 ```
 But please do NOT use the above regex examples. Also never try to limit the chars to only a subset of characters.
 Always allow [a-z], [0-9] and ALL special chars a user can possibly type in.

+ 2 - 2
docs/Behavior/Reset.md

@@ -56,11 +56,11 @@ Note that in this case we also use a timeout to avoid getting a penalty by Googl
 
 In this case we added a new cache field to our messages in order to make the search faster with >> 100000 records. The data was containing all the info we needed – in serialized format. We needed a callback here as there was some logic involved. So we simply made a shell containing both callback method and shell command:
 ```php
-$this->Message->addBehavior('Tools.Reset', [
+$this->Messages->addBehavior('Tools.Reset', [
     'fields' => ['data'], 'updateFields' => ['guest_name'],
     'scope' => ['data LIKE' => '{%'], 'callback' => 'UpdateShell::prepMessage'
     ]);
-$res = $this->Message->resetRecords();
+$res = $this->Messages->resetRecords();
 $this->out('Done: ' . $res);
 ```
 

+ 25 - 0
docs/Behavior/Typographic.md

@@ -0,0 +1,25 @@
+# Typographic Behavior
+
+A CakePHP behavior to handle typographic consistency.
+
+The basic idea is to normalize all input into a standard typography (utf8 default).
+So different quotes like `»` or `“` end up as `"` in the database.
+Upon output one can the decide to re-apply localization here.
+
+### Usage
+
+#### Basic usage
+Include behavior in your Table class as
+```php
+$this->addBehavior('Tools.Typographic', [
+	'fields' => ['content'], 
+	'mergeQuotes' => false,
+]);
+```
+
+Set the `fields` to your table fields you want to normalize.
+
+### Configuration
+
+With `mergeQuotes` option you can define if both `"` and `'` should be merged into one of them.
+Defaults to `false` as they might be used nested for default input.

+ 215 - 0
src/Model/Behavior/TypographicBehavior.php

@@ -0,0 +1,215 @@
+<?php
+/**
+ * @author Mark Scherer
+ * @license http://opensource.org/licenses/mit-license.php MIT
+ */
+
+namespace Tools\Model\Behavior;
+
+use ArrayAccess;
+use Cake\Event\Event;
+use ArrayObject;
+use Cake\Datasource\EntityInterface;
+use Cake\ORM\Behavior;
+
+/**
+ * Replace regionalized chars with standard ones on input.
+ *
+ * “smart quotes” become "dumb quotes" on save
+ * „low-high“ become "high-high"
+ * same for single quotes (apostrophes)
+ * in order to unify them. Basic idea is a unified non-regional version in the database.
+ *
+ * Using the TypographyHelper we can then format the output
+ * according to the language/regional setting (in some languages
+ * the high-high smart quotes, in others the low-high ones are preferred)
+ *
+ * Settings are:
+ * - string $before (validate/save)
+ * - array $fields (leave empty for auto detection)
+ * - bool $mergeQuotes (merge single and double into " or any custom char)
+ *
+ * TODOS:
+ * - respect primary and secondary quotations marks as well as alternatives
+ *
+ * @link http://www.dereuromark.de/2012/08/12/typographic-behavior-and-typography-helper/
+ * @link http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks
+ */
+class TypographicBehavior extends Behavior {
+
+	const BEFORE_MARSHAL = 'marshal';
+	const BEFORE_SAVE = 'save';
+
+	/**
+	 * @var array
+	 */
+	protected $_map = [
+		'in' => [
+			'‘' => '\'',
+			// Translates to '&lsquo;'.
+			'’' => '\'',
+			// Translates to '&rsquo;'.
+			'‚' => '\'',
+			// Translates to '&sbquo;'.
+			'‛' => '\'',
+			// Translates to '&#8219;'.
+			'“' => '"',
+			// Translates to '&ldquo;'.
+			'”' => '"',
+			// Translates to '&rdquo;'.
+			'„' => '"',
+			// Translates to '&bdquo;'.
+			'‟' => '"',
+			// Translates to '&#8223;'.
+			'«' => '"',
+			// Translates to '&laquo;'.
+			'»' => '"',
+			// Translates to '&raquo;'.
+			'‹' => '\'',
+			// Translates to '&laquo;'.
+			'›' => '\'',
+			// Translates to '&raquo;'.
+		],
+		'out' => [
+			// Use the TypographyHelper for this at runtime.
+		],
+	];
+
+	/**
+	 * @return int|null
+	 */
+	protected $_id;
+
+	/**
+	 * @var array
+	 */
+	protected $_defaultConfig = [
+		'before' => self::BEFORE_SAVE, // save or marshal
+		'fields' => [],
+		'mergeQuotes' => false, // Set to true for " or explicitly set a char (" or ').
+	];
+
+	/**
+	 * Initiate behavior for the model using specified settings.
+	 * Available settings:
+	 *
+	 * @param array $config Settings to override for model.
+	 * @return void
+	 */
+	public function initialize(array $config = []) {
+		if (empty($this->_config['fields'])) {
+			$schema = $this->getTable()->schema();
+
+			foreach ($schema->columns() as $field) {
+				$v = $schema->column($field);
+				if (!in_array($v['type'], ['string', 'text'])) {
+					continue;
+				}
+				if (!empty($v['key'])) {
+					continue;
+				}
+				if (isset($v['length']) && $v['length'] === 1) { // TODO: also skip UUID (lenght 36)?
+					continue;
+				}
+				$fields[] = $field;
+			}
+			$this->_config['fields'] = $fields;
+		}
+		if ($this->_config['mergeQuotes'] === true) {
+			$this->_config['mergeQuotes'] = '"';
+		}
+	}
+
+	/**
+	 * @param \Cake\Event\Event $event
+	 * @param \ArrayObject $data
+	 * @param \ArrayObject $options
+	 * @return bool
+	 */
+	public function beforeMarshal(Event $event, ArrayObject $data, ArrayObject $options) {
+		if ($this->_config['before'] === 'marshal') {
+			$this->process($data);
+		}
+
+		return true;
+	}
+
+	/**
+	 * @param \Cake\Event\Event $event
+	 * @param \Cake\Datasource\EntityInterface $entity
+	 * @param \ArrayObject $options
+	 * @return bool
+	 */
+	public function beforeSave(Event $event, EntityInterface $entity, ArrayObject $options) {
+		if ($this->_config['before'] === 'save') {
+			$this->process($entity);
+		}
+
+		return true;
+	}
+
+	/**
+	 * Run the behavior over all records of this model
+	 * This is useful if you attach it after some records have already been saved without it.
+	 *
+	 * @param bool $dryRun
+	 * @return int count Number of affected/changed records
+	 */
+	public function updateTypography($dryRun = false) {
+		$options = ['limit' => 100, 'offset' => 0];
+		$count = 0;
+		while ($records = $this->getTable()->find('all', $options)->toArray()) {
+			foreach ($records as $record) {
+				$changed = false;
+				foreach ($this->_config['fields'] as $field) {
+					if (empty($record[$field])) {
+						continue;
+					}
+					$tmp = $this->_prepareInput($record[$field]);
+					if ($tmp == $record[$field]) {
+						continue;
+					}
+					$record[$field] = $tmp;
+					$changed = true;
+				}
+				if ($changed) {
+					if (!$dryRun) {
+						$this->getTable()->save($record, ['validate' => false]);
+					}
+					$count++;
+				}
+			}
+			$options['offset'] += 100;
+		}
+		return $count;
+	}
+
+	/**
+	 * Run before a model is saved
+	 *
+	 * @param \ArrayAccess $data
+	 * @return void
+	 */
+	public function process(ArrayAccess $data) {
+		foreach ($this->_config['fields'] as $field) {
+			if (!empty($data[$field])) {
+				$data[$field] = $this->_prepareInput($data[$field]);
+			}
+		}
+	}
+
+	/**
+	 * @param string $string
+	 * @return string cleanedInput
+	 */
+	protected function _prepareInput($string) {
+		$map = $this->_map['in'];
+		if ($this->_config['mergeQuotes']) {
+			foreach ($map as $key => $val) {
+				$map[$key] = $this->_config['mergeQuotes'];
+			}
+		}
+		return str_replace(array_keys($map), array_values($map), $string);
+	}
+
+}

+ 2 - 3
src/Model/Table/TokensTable.php

@@ -3,7 +3,6 @@
 namespace Tools\Model\Table;
 
 use Cake\Utility\Hash;
-use Tools\Model\Table\Table;
 use Tools\Utility\Random;
 
 /**
@@ -212,8 +211,8 @@ class TokensTable extends Table {
 		$keys['unused_invalid'] = $this->find('count', ['conditions' => [$this->alias() . '.used' => 0, $this->alias() . '.created <' => date(FORMAT_DB_DATETIME, time() - $this->validity)]]);
 		$keys['used_invalid'] = $this->find('count', ['conditions' => [$this->alias() . '.used' => 1, $this->alias() . '.created <' => date(FORMAT_DB_DATETIME, time() - $this->validity)]]);
 
-		$types = $this->find('all', ['conditions' => [], 'fields' => ['DISTINCT type']]);
-		$keys['types'] = !empty($types) ? Hash::extract('{n}.type', $types) : [];
+		$types = $this->find('all', ['conditions' => [], 'fields' => ['DISTINCT type']])->toArray();
+		$keys['types'] = !empty($types) ? Hash::extract($types, '{n}.type') : [];
 		return $keys;
 	}
 

+ 138 - 0
tests/TestCase/Model/Behavior/TypographicBehaviorTest.php

@@ -0,0 +1,138 @@
+<?php
+namespace Tools\Test\TestCase\Model\Behavior;
+
+use Cake\ORM\TableRegistry;
+use Tools\Model\Behavior\TypographicBehavior;
+use Tools\TestSuite\TestCase;
+
+class TypographicBehaviorTest extends TestCase {
+
+	/**
+	 * @var \Cake\ORM\Table
+	 */
+	public $Model;
+
+	/**
+	 * @var array
+	 */
+	public $fixtures = ['core.articles'];
+
+	public function setUp() {
+		parent::setUp();
+
+		$this->Model = TableRegistry::get('Articles');
+		$this->Model->addBehavior('Tools.Typographic', ['fields' => ['body'], 'before' => 'marshal']);
+	}
+
+	public function testObject() {
+		$this->assertInstanceOf(TypographicBehavior::class, $this->Model->behaviors()->Typographic);
+	}
+
+	/**
+	 * @return void
+	 */
+	public function testBeforeMarshal() {
+		$data = [
+			'title' => 'some «cool» title',
+			'body' => 'A title with normal "qotes" - should be left untouched',
+		];
+		$entity = $this->Model->newEntity($data);
+		$this->assertEmpty($entity->getErrors());
+
+		$result = $entity->toArray();
+		$this->assertSame($data, $result);
+
+		$strings = [
+			'some string with ‹single angle quotes›' => 'some string with \'single angle quotes\'',
+			'other string with „German‟ quotes' => 'other string with "German" quotes',
+			'mixed single ‚one‛ and ‘two’.' => 'mixed single \'one\' and \'two\'.',
+			'mixed double “one” and «two».' => 'mixed double "one" and "two".',
+		];
+		foreach ($strings as $was => $expected) {
+			$data = [
+				'title' => 'some «cool» title',
+				'body' => $was
+			];
+			$entity = $this->Model->newEntity($data);
+			$this->assertEmpty($entity->getErrors());
+
+			$result = $entity->toArray();
+			$this->assertSame($data['title'], $result['title']);
+			$this->assertSame($expected, $result['body']);
+		}
+	}
+
+	/**
+	 * @return void
+	 */
+	public function testMergeQuotes() {
+		$this->Model->removeBehavior('Typographic');
+		$this->Model->addBehavior('Tools.Typographic', ['before' => 'marshal', 'mergeQuotes' => true]);
+		$strings = [
+			'some string with ‹single angle quotes›' => 'some string with "single angle quotes"',
+			'other string with „German‟ quotes' => 'other string with "German" quotes',
+			'mixed single ‚one‛ and ‘two’.' => 'mixed single "one" and "two".',
+			'mixed double “one” and «two».' => 'mixed double "one" and "two".',
+		];
+		foreach ($strings as $was => $expected) {
+			$data = [
+				'title' => 'some «cool» title',
+				'body' => $was
+			];
+			$entity = $this->Model->newEntity($data);
+			$this->assertEmpty($entity->getErrors());
+
+			$result = $entity->toArray();
+			$this->assertSame('some "cool" title', $result['title']);
+			$this->assertSame($expected, $result['body']);
+		}
+	}
+
+	/**
+	 * Test that not defining fields results in all textarea and text fields being processed
+	 */
+	public function testAutoFields() {
+		$this->Model->removeBehavior('Typographic');
+		$this->Model->addBehavior('Tools.Typographic');
+		$data = [
+			'title' => '„German‟ quotes',
+			'body' => 'mixed double “one” and «two»',
+		];
+
+		$entity = $this->Model->newEntity($data);
+		$this->assertEmpty($entity->getErrors());
+		$res = $this->Model->save($entity);
+		$this->assertTrue((bool)$res);
+
+		$expected = [
+			'title' => '"German" quotes',
+			'body' => 'mixed double "one" and "two"',
+		];
+
+		$this->assertSame($expected['title'], $res['title']);
+		$this->assertSame($expected['body'], $res['body']);
+	}
+
+	/**
+	 * @return void
+	 */
+	public function testUpdateTypography() {
+		$this->Model->removeBehavior('Typographic');
+		for ($i = 0; $i < 202; $i++) {
+			$data = [
+				'title' => 'title ' . $i,
+				'body' => 'unclean `content` to «correct»',
+			];
+			$entity = $this->Model->newEntity($data);
+			$result = $this->Model->save($entity);
+			$this->assertTrue((bool)$result);
+		}
+		$this->Model->addBehavior('Tools.Typographic');
+		$count = $this->Model->updateTypography();
+		$this->assertTrue($count >= 200);
+
+		$record = $this->Model->find()->orderDesc('id')->firstOrFail();
+		$this->assertSame('unclean `content` to "correct"', $record['body']);
+	}
+
+}