dql-parser.rst 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. DQL Lexer
  2. =========
  3. Here is a more complicated example from the Doctrine ORM project.
  4. The ``Doctrine\ORM\Query\Lexer`` implementation for DQL looks something
  5. like the following:
  6. .. code-block:: php
  7. use Doctrine\Common\Lexer\AbstractLexer;
  8. class Lexer extends AbstractLexer
  9. {
  10. // All tokens that are not valid identifiers must be < 100
  11. public const T_NONE = 1;
  12. public const T_INTEGER = 2;
  13. public const T_STRING = 3;
  14. public const T_INPUT_PARAMETER = 4;
  15. public const T_FLOAT = 5;
  16. public const T_CLOSE_PARENTHESIS = 6;
  17. public const T_OPEN_PARENTHESIS = 7;
  18. public const T_COMMA = 8;
  19. public const T_DIVIDE = 9;
  20. public const T_DOT = 10;
  21. public const T_EQUALS = 11;
  22. public const T_GREATER_THAN = 12;
  23. public const T_LOWER_THAN = 13;
  24. public const T_MINUS = 14;
  25. public const T_MULTIPLY = 15;
  26. public const T_NEGATE = 16;
  27. public const T_PLUS = 17;
  28. public const T_OPEN_CURLY_BRACE = 18;
  29. public const T_CLOSE_CURLY_BRACE = 19;
  30. // All tokens that are identifiers or keywords that could be considered as identifiers should be >= 100
  31. public const T_ALIASED_NAME = 100;
  32. public const T_FULLY_QUALIFIED_NAME = 101;
  33. public const T_IDENTIFIER = 102;
  34. // All keyword tokens should be >= 200
  35. public const T_ALL = 200;
  36. public const T_AND = 201;
  37. public const T_ANY = 202;
  38. public const T_AS = 203;
  39. public const T_ASC = 204;
  40. public const T_AVG = 205;
  41. public const T_BETWEEN = 206;
  42. public const T_BOTH = 207;
  43. public const T_BY = 208;
  44. public const T_CASE = 209;
  45. public const T_COALESCE = 210;
  46. public const T_COUNT = 211;
  47. public const T_DELETE = 212;
  48. public const T_DESC = 213;
  49. public const T_DISTINCT = 214;
  50. public const T_ELSE = 215;
  51. public const T_EMPTY = 216;
  52. public const T_END = 217;
  53. public const T_ESCAPE = 218;
  54. public const T_EXISTS = 219;
  55. public const T_FALSE = 220;
  56. public const T_FROM = 221;
  57. public const T_GROUP = 222;
  58. public const T_HAVING = 223;
  59. public const T_HIDDEN = 224;
  60. public const T_IN = 225;
  61. public const T_INDEX = 226;
  62. public const T_INNER = 227;
  63. public const T_INSTANCE = 228;
  64. public const T_IS = 229;
  65. public const T_JOIN = 230;
  66. public const T_LEADING = 231;
  67. public const T_LEFT = 232;
  68. public const T_LIKE = 233;
  69. public const T_MAX = 234;
  70. public const T_MEMBER = 235;
  71. public const T_MIN = 236;
  72. public const T_NEW = 237;
  73. public const T_NOT = 238;
  74. public const T_NULL = 239;
  75. public const T_NULLIF = 240;
  76. public const T_OF = 241;
  77. public const T_OR = 242;
  78. public const T_ORDER = 243;
  79. public const T_OUTER = 244;
  80. public const T_PARTIAL = 245;
  81. public const T_SELECT = 246;
  82. public const T_SET = 247;
  83. public const T_SOME = 248;
  84. public const T_SUM = 249;
  85. public const T_THEN = 250;
  86. public const T_TRAILING = 251;
  87. public const T_TRUE = 252;
  88. public const T_UPDATE = 253;
  89. public const T_WHEN = 254;
  90. public const T_WHERE = 255;
  91. public const T_WITH = 256;
  92. /**
  93. * Creates a new query scanner object.
  94. *
  95. * @param string $input A query string.
  96. */
  97. public function __construct($input)
  98. {
  99. $this->setInput($input);
  100. }
  101. /**
  102. * {@inheritdoc}
  103. */
  104. protected function getCatchablePatterns()
  105. {
  106. return [
  107. '[a-z_][a-z0-9_]*\:[a-z_][a-z0-9_]*(?:\\\[a-z_][a-z0-9_]*)*', // aliased name
  108. '[a-z_\\\][a-z0-9_]*(?:\\\[a-z_][a-z0-9_]*)*', // identifier or qualified name
  109. '(?:[0-9]+(?:[\.][0-9]+)*)(?:e[+-]?[0-9]+)?', // numbers
  110. "'(?:[^']|'')*'", // quoted strings
  111. '\?[0-9]*|:[a-z_][a-z0-9_]*', // parameters
  112. ];
  113. }
  114. /**
  115. * {@inheritdoc}
  116. */
  117. protected function getNonCatchablePatterns()
  118. {
  119. return ['\s+', '(.)'];
  120. }
  121. /**
  122. * {@inheritdoc}
  123. */
  124. protected function getType(&$value)
  125. {
  126. $type = self::T_NONE;
  127. switch (true) {
  128. // Recognize numeric values
  129. case (is_numeric($value)):
  130. if (strpos($value, '.') !== false || stripos($value, 'e') !== false) {
  131. return self::T_FLOAT;
  132. }
  133. return self::T_INTEGER;
  134. // Recognize quoted strings
  135. case ($value[0] === "'"):
  136. $value = str_replace("''", "'", substr($value, 1, strlen($value) - 2));
  137. return self::T_STRING;
  138. // Recognize identifiers, aliased or qualified names
  139. case (ctype_alpha($value[0]) || $value[0] === '_' || $value[0] === '\\'):
  140. $name = 'Doctrine\ORM\Query\Lexer::T_' . strtoupper($value);
  141. if (defined($name)) {
  142. $type = constant($name);
  143. if ($type > 100) {
  144. return $type;
  145. }
  146. }
  147. if (strpos($value, ':') !== false) {
  148. return self::T_ALIASED_NAME;
  149. }
  150. if (strpos($value, '\\') !== false) {
  151. return self::T_FULLY_QUALIFIED_NAME;
  152. }
  153. return self::T_IDENTIFIER;
  154. // Recognize input parameters
  155. case ($value[0] === '?' || $value[0] === ':'):
  156. return self::T_INPUT_PARAMETER;
  157. // Recognize symbols
  158. case ($value === '.'):
  159. return self::T_DOT;
  160. case ($value === ','):
  161. return self::T_COMMA;
  162. case ($value === '('):
  163. return self::T_OPEN_PARENTHESIS;
  164. case ($value === ')'):
  165. return self::T_CLOSE_PARENTHESIS;
  166. case ($value === '='):
  167. return self::T_EQUALS;
  168. case ($value === '>'):
  169. return self::T_GREATER_THAN;
  170. case ($value === '<'):
  171. return self::T_LOWER_THAN;
  172. case ($value === '+'):
  173. return self::T_PLUS;
  174. case ($value === '-'):
  175. return self::T_MINUS;
  176. case ($value === '*'):
  177. return self::T_MULTIPLY;
  178. case ($value === '/'):
  179. return self::T_DIVIDE;
  180. case ($value === '!'):
  181. return self::T_NEGATE;
  182. case ($value === '{'):
  183. return self::T_OPEN_CURLY_BRACE;
  184. case ($value === '}'):
  185. return self::T_CLOSE_CURLY_BRACE;
  186. // Default
  187. default:
  188. // Do nothing
  189. }
  190. return $type;
  191. }
  192. }
  193. This is roughly what the DQL Parser looks like that uses the above
  194. Lexer implementation:
  195. .. note::
  196. You can see the full implementation `here <https://github.com/doctrine/doctrine2/blob/master/lib/Doctrine/ORM/Query/Parser.php>`_.
  197. .. code-block:: php
  198. class Parser
  199. {
  200. private $lexer;
  201. public function __construct($dql)
  202. {
  203. $this->lexer = new Lexer();
  204. $this->lexer->setInput($dql);
  205. }
  206. // ...
  207. public function getAST()
  208. {
  209. // Parse & build AST
  210. $AST = $this->QueryLanguage();
  211. // ...
  212. return $AST;
  213. }
  214. public function QueryLanguage()
  215. {
  216. $this->lexer->moveNext();
  217. switch ($this->lexer->lookahead['type']) {
  218. case Lexer::T_SELECT:
  219. $statement = $this->SelectStatement();
  220. break;
  221. case Lexer::T_UPDATE:
  222. $statement = $this->UpdateStatement();
  223. break;
  224. case Lexer::T_DELETE:
  225. $statement = $this->DeleteStatement();
  226. break;
  227. default:
  228. $this->syntaxError('SELECT, UPDATE or DELETE');
  229. break;
  230. }
  231. // Check for end of string
  232. if ($this->lexer->lookahead !== null) {
  233. $this->syntaxError('end of string');
  234. }
  235. return $statement;
  236. }
  237. // ...
  238. }
  239. Now the AST is used to transform the DQL query in to portable SQL for whatever relational
  240. database you are using!
  241. .. code-block:: php
  242. $parser = new Parser('SELECT u FROM User u');
  243. $AST = $parser->getAST(); // returns \Doctrine\ORM\Query\AST\SelectStatement
  244. What is an AST?
  245. ===============
  246. AST stands for `Abstract syntax tree <http://en.wikipedia.org/wiki/Abstract_syntax_tree>`_.
  247. In computer science, an abstract syntax tree (AST), or just syntax tree, is a
  248. tree representation of the abstract syntactic structure of source code written
  249. in a programming language. Each node of the tree denotes a construct occurring in
  250. the source code.