Initial commit

2017-11-11 15:15:10 -02:00
commit 58c3bd6728
1202 changed files with 434097 additions and 0 deletions
--- a/elpa/haskell-mode-20171022.26/haskell-lexeme.el
+++ b/elpa/haskell-mode-20171022.26/haskell-lexeme.el
@@ -0,0 +1,513 @@
+;;; haskell-lexeme.el --- haskell lexical tokens   -*- coding: utf-8; lexical-binding: t -*-
+
+;; Copyright (C) 2015 Gracjan Polak
+
+;; This file is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;;; Code:
+
+(require 'rx)
+
+(unless (category-docstring ?P)
+  (define-category ?P "Haskell symbol constituent characters")
+  (map-char-table
+   #'(lambda (key val)
+       (if (or
+            (and (consp key) (> (car key) 128))
+            (and (numberp key) (> key 128)))
+           (if (member val '(Pc Pd Po Sm Sc Sk So))
+               (modify-category-entry key ?P))))
+   unicode-category-table)
+
+  (dolist (key (string-to-list "!#$%&*+./<=>?@^|~\\-:"))
+    (modify-category-entry key ?P)))
+
+(defconst haskell-lexeme-modid
+  "[[:upper:]][[:alnum:]'_]*"
+  "Regexp matching a valid Haskell module identifier.
+
+Note that GHC accepts Unicode category UppercaseLetter as a first
+character. Following letters are from Unicode categories
+UppercaseLetter, LowercaseLetter, OtherLetter, TitlecaseLetter,
+ModifierLetter, DecimalNumber, OtherNumber, backslash or
+underscore.")
+
+(defconst haskell-lexeme-id
+  "[[:alpha:]_][[:alnum:]'_]*"
+  "Regexp matching a valid Haskell identifier.
+
+GHC accepts a string starting with any alphabetic character or
+underscore followed by any alphanumeric character or underscore
+or apostrophe.")
+
+(defconst haskell-lexeme-sym
+  "\\cP+"
+  "Regexp matching a valid Haskell variable or constructor symbol.
+
+GHC accepts a string of chars from the set
+[:!#$%&*+./<=>?@^|~\\-] or Unicode category Symbol for chars with
+codes larger than 128 only.")
+
+(defconst haskell-lexeme-idsym-first-char
+  "\\(?:[[:alpha:]_]\\|\\cP\\)"
+  "Regexp matching first character of a qualified or unqualified
+identifier or symbol.
+
+Useful for `re-search-forward'.")
+
+(defconst haskell-lexeme-modid-opt-prefix
+  (concat "\\(?:" haskell-lexeme-modid "\\.\\)*")
+  "Regexp matching a valid Haskell module prefix, potentially empty.
+
+Module path prefix is separated by dots and finishes with a
+dot. For path component syntax see `haskell-lexeme-modid'.")
+
+(defconst haskell-lexeme-qid-or-qsym
+  (rx-to-string `(: (regexp ,haskell-lexeme-modid-opt-prefix)
+                    (group (| (regexp ,haskell-lexeme-id) (regexp ,haskell-lexeme-sym)
+                              ))))
+  "Regexp matching a valid qualified identifier or symbol.
+
+Note that (match-string 1) returns the unqualified part.")
+
+(defun haskell-lexeme-looking-at-qidsym ()
+  "Non-nil when point is just in front of an optionally qualified
+identifier or symbol.
+
+Using this function is more efficient than matching against the
+regexp `haskell-lexeme-qid-or-qsym'.
+
+Returns:
+  'qid - if matched a qualified id: 'Data.Map' or 'Map'
+  'qsym - if matched a qualified id: 'Monad.>>=' or '>>='
+  'qprefix - if matched only modid prefix: 'Data.'
+
+After successful 'qid or 'qsym match (match-string 1) will return
+the unqualified part (if any)."
+  (let ((begin (point))
+        (match-data-old (match-data)))
+    (save-excursion
+      (while (looking-at (concat haskell-lexeme-modid "\\."))
+        (goto-char (match-end 0)))
+      (cond
+       ((looking-at haskell-lexeme-id)
+        (let ((beg (match-beginning 0))
+              (end (match-end 0)))
+
+          ;; check is MagicHash is present at the end of the token
+          (goto-char end)
+          (when (looking-at "#+")
+            (setq end (match-end 0)))
+
+          (set-match-data
+           (list begin end
+                 beg end)))
+        'qid)
+       ((looking-at haskell-lexeme-sym)
+        (set-match-data
+          (list begin (match-end 0)
+                (match-beginning 0) (match-end 0)))
+        'qsym)
+       ((equal begin (point))
+        (set-match-data match-data-old)
+        nil)
+       (t
+        (set-match-data
+         (list begin (point)
+               nil nil))
+        'qprefix)))))
+
+(defun haskell-lexeme-looking-at-backtick ()
+  "Non-nil when point is just in front of an identifier quoted with backticks.
+
+When match is successful, match-data will contain:
+  (match-text 1) - opening backtick
+  (match-text 2) - whole qualified identifier
+  (match-text 3) - unqualified part of identifier
+  (match-text 4) - closing backtick"
+  (let ((match-data-old (match-data))
+        first-backtick-start
+        last-backtick-start
+        qid-start
+        id-start
+        id-end
+        result)
+    (save-excursion
+      (when (looking-at "`")
+        (setq first-backtick-start (match-beginning 0))
+        (goto-char (match-end 0))
+        (forward-comment (buffer-size))
+        (when (haskell-lexeme-looking-at-qidsym)
+          (setq qid-start (match-beginning 0))
+          (setq id-start (match-beginning 1))
+          (setq id-end (match-end 1))
+          (goto-char (match-end 0))
+          (forward-comment (buffer-size))
+          (when (looking-at "`")
+            (setq last-backtick-start (match-beginning 0))
+            (set-match-data
+             (mapcar
+              (lambda (p)
+                (set-marker (make-marker) p))
+              (list
+               first-backtick-start (1+ last-backtick-start)
+               first-backtick-start (1+ first-backtick-start)
+               qid-start id-end
+               id-start id-end
+               last-backtick-start (1+ last-backtick-start))))
+            (setq result t)))))
+    (unless result
+      (set-match-data match-data-old))
+    result))
+
+(defconst haskell-lexeme-qid
+  (rx-to-string `(: (regexp "'*")
+                    (regexp ,haskell-lexeme-modid-opt-prefix)
+                    (group (regexp ,haskell-lexeme-id))))
+  "Regexp matching a valid qualified identifier.
+
+Note that (match-string 1) returns the unqualified part.")
+
+(defconst haskell-lexeme-qsym
+  (rx-to-string `(: (regexp "'*")
+                    (regexp ,haskell-lexeme-modid-opt-prefix)
+                    (group (regexp ,haskell-lexeme-id))))
+  "Regexp matching a valid qualified symbol.
+
+Note that (match-string 1) returns the unqualified part.")
+
+(defconst haskell-lexeme-number
+  (rx (| (: (regexp "[0-9]+\\.[0-9]+") (opt (regexp "[eE][-+]?[0-9]+")))
+         (regexp "[0-9]+[eE][-+]?[0-9]+")
+         (regexp "0[xX][0-9a-fA-F]+")
+         (regexp "0[oO][0-7]+")
+         (regexp "[0-9]+")))
+  "Regexp matching a floating point, decimal, octal or hexadecimal number.
+
+Note that negative sign char is not part of a number.")
+
+(defconst haskell-lexeme-char-literal-inside
+  (rx (| (not (any "\n'\\"))
+         (: "\\"
+            (| "a" "b" "f" "n" "r" "t" "v" "\\" "\"" "'"
+               "NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK"
+               "BEL" "BS" "HT" "LF" "VT" "FF" "CR" "SO" "SI" "DLE"
+               "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB" "CAN"
+               "EM" "SUB" "ESC" "FS" "GS" "RS" "US" "SP" "DEL"
+               (regexp "[0-9]+")
+               (: "x" (regexp "[0-9a-fA-F]+"))
+               (: "o" (regexp "[0-7]+"))
+               (: "^" (regexp "[]A-Z@^_\\[]"))))))
+  "Regexp matching an inside of a character literal.
+
+Note that `haskell-lexeme-char-literal-inside' matches strictly
+only escape sequences defined in Haskell Report.")
+
+(defconst haskell-lexeme--char-literal-rx
+  (rx-to-string `(: (group "'")
+                    (| (: (group (regexp "[[:alpha:]_([]")) (group "'")) ; exactly one char
+                       (: (group (| (regexp "\\\\[^\n][^'\n]*") ; allow quote just after first backslash
+                                    (regexp "[^[:alpha:]_(['\n][^'\n]*")))
+                          (| (group "'") "\n" (regexp "\\'"))))))
+  "Regexp matching a character literal lookalike.
+
+Note that `haskell-lexeme--char-literal-rx' matches more than
+Haskell Report specifies because we want to support also code
+under edit.
+
+Character literals end with a quote or a newline or end of
+buffer.
+
+Regexp has subgroup expressions:
+ (match-text 1) matches the opening quote.
+ (match-text 2) matches the inside of the character literal.
+ (match-text 3) matches the closing quote or an empty string
+                at the end of line or the end buffer.")
+
+(defun haskell-lexeme-looking-at-char-literal ()
+  "Non-nil when point is at a char literal lookalike.
+
+Note that this function matches more than Haskell Report
+specifies because we want to support also code under edit.
+
+Char literals end with a quote or an unescaped newline or end
+of buffer.
+
+After successful match:
+ (match-text 1) matches the opening quote.
+ (match-text 2) matches the inside of the char literla.
+ (match-text 3) matches the closing quote, or a closing
+                newline or is nil when at the end of the buffer."
+  (when (looking-at haskell-lexeme--char-literal-rx)
+    (set-match-data
+     (list (match-beginning 0) (match-end 0)
+           (match-beginning 1) (match-end 1)
+           (or (match-beginning 2) (match-beginning 4)) (or (match-end 2) (match-end 4))
+           (or (match-beginning 3) (match-beginning 5)) (or (match-end 3) (match-end 5))))
+    t))
+
+(defconst haskell-lexeme-string-literal-inside-item
+  (rx (| (not (any "\n\"\\"))
+         (: "\\"
+            (| "a" "b" "f" "n" "r" "t" "v" "\\" "\"" "'" "&"
+               "NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK"
+               "BEL" "BS" "HT" "LF" "VT" "FF" "CR" "SO" "SI" "DLE"
+               "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB" "CAN"
+               "EM" "SUB" "ESC" "FS" "GS" "RS" "US" "SP" "DEL"
+               (regexp "[0-9]+")
+               (: "x" (regexp "[0-9a-fA-F]+"))
+               (: "o" (regexp "[0-7]+"))
+               (: "^" (regexp "[]A-Z@^_\\[]"))
+               (regexp "[ \t\n\r\v\f]*\\\\")))))
+  "Regexp matching an item that is a single character or a single
+escape sequence inside of a string literal.
+
+Note that `haskell-lexeme-string-literal-inside-item' matches
+strictly only escape sequences defined in Haskell Report.")
+
+(defconst haskell-lexeme-string-literal
+  (rx (: (group "\"")
+         (group (* (| (regexp "\\\\[ \t\n\r\v\f]*\\\\")
+                      (regexp "\\\\[ \t\n\r\v\f]+")
+                      (regexp "\\\\[^ \t\n\r\v\f]")
+                      (* (regexp "[^\"\n\\]")))))
+         (group (| "\"" (regexp "$") (regexp "\\\\?\\'")
+                   ))))
+  "Regexp matching a string literal lookalike.
+
+Note that `haskell-lexeme-string-literal' matches more than
+Haskell Report specifies because we want to support also code
+under edit.
+
+String literals end with double quote or unescaped newline or end
+of buffer.
+
+Regexp has subgroup expressions:
+ (match-text 1) matches the opening double quote.
+ (match-text 2) matches the inside of the string.
+ (match-text 3) matches the closing double quote or an empty string
+                at the end of line or the end buffer.")
+
+(defun haskell-lexeme-looking-at-string-literal ()
+  "Non-nil when point is at a string literal lookalike.
+
+Note that this function matches more than Haskell Report
+specifies because we want to support also code under edit.
+
+String literals end with double quote or unescaped newline or end
+of buffer.
+
+After successful match:
+ (match-text 1) matches the opening doublequote.
+ (match-text 2) matches the inside of the string.
+ (match-text 3) matches the closing quote, or a closing
+                newline or is nil when at the end of the buffer."
+  (when (looking-at "\"")
+    (save-excursion
+      (let ((begin (point)))
+        (goto-char (match-end 0))
+        (let (finish)
+          (while (and (not finish)
+                      (re-search-forward "[\"\n\\]" nil 'goto-eob))
+            (cond
+             ((equal (match-string 0) "\\")
+              (if (looking-at "[ \t\n\r\v\f]+\\\\?")
+                  (goto-char (match-end 0))
+                (goto-char (1+ (point)))))
+
+             ((equal (match-string 0) "\"")
+              (set-match-data
+               (list begin (match-end 0)
+                     begin (1+ begin)
+                     (1+ begin) (match-beginning 0)
+                     (match-beginning 0) (match-end 0)))
+              (setq finish t))
+
+             ((equal (match-string 0) "\n")
+              (set-match-data
+               (list begin (match-beginning 0)
+                     begin (1+ begin)
+                     (1+ begin) (match-beginning 0)
+                     nil nil))
+              (setq finish t))))
+          (unless finish
+            ;; string closed by end of buffer
+            (set-match-data
+             (list begin (point)
+                   begin (1+ begin)
+                   (1+ begin) (point)
+                   nil nil))))))
+    ;; there was a match
+    t))
+
+(defun haskell-lexeme-looking-at-quasi-quote-literal ()
+  "Non-nil when point is just in front of Template Haskell
+quaisquote literal.
+
+Quasi quotes start with '[xxx|' or '[$xxx|' sequence and end with
+  '|]'. The 'xxx' is a quoter name. There is no escaping mechanism
+provided for the ending sequence.
+
+Regexp has subgroup expressions:
+ (match-text 1) matches the quoter name (without $ sign if present).
+ (match-text 2) matches the opening vertical bar.
+ (match-text 3) matches the inside of the quoted string.
+ (match-text 4) matches the closing vertical bar
+                or nil if at the end of the buffer.
+
+Note that this function excludes 'e', 't', 'd', 'p' as quoter
+names according to Template Haskell specification."
+  (let ((match-data-old (match-data)))
+    (if (and
+         (looking-at (rx-to-string `(: "[" (optional "$")
+                                       (group (regexp ,haskell-lexeme-id))
+                                       (group "|"))))
+         (equal (haskell-lexeme-classify-by-first-char (char-after (match-beginning 1)))
+                'varid)
+         (not (member (match-string 1) '("e" "t" "d" "p"))))
+      (save-excursion
+        ;; note that quasi quote syntax does not have any escaping
+        ;; mechanism and if not closed it will span til lthe end of buffer
+        (goto-char (match-end 0))
+        (let ((match-data (match-data))
+              (match-data-2 (and (re-search-forward "|]" nil t)
+                                 (match-data))))
+          (if match-data-2
+              (set-match-data
+               (list
+                (nth 0 match-data) (nth 1 match-data-2)          ;; whole match
+                (nth 2 match-data) (nth 3 match-data)            ;; quoter name
+                (nth 4 match-data) (nth 5 match-data)            ;; opening bar
+                (nth 5 match-data) (nth 0 match-data-2)          ;; inner string
+                (nth 0 match-data-2) (1+ (nth 0 match-data-2)))) ;; closing bar
+
+            (set-match-data
+             (list
+              (nth 0 match-data) (point-max)                   ;; whole match
+              (nth 2 match-data) (nth 3 match-data)            ;; quoter name
+              (nth 4 match-data) (nth 5 match-data)            ;; opening bar
+              (nth 5 match-data) (point-max)                   ;; inner string
+              nil nil))                                        ;; closing bar
+            ))
+        t)
+      ;; restore old match data if not matched
+      (set-match-data match-data-old)
+      nil)))
+
+(defun haskell-lexeme-classify-by-first-char (char)
+  "Classify token by CHAR.
+
+CHAR is a chararacter that is assumed to be the first character
+of a token."
+  (let ((category (get-char-code-property (or char ?\ ) 'general-category)))
+
+    (cond
+     ((or (member char '(?! ?# ?$ ?% ?& ?* ?+ ?. ?/ ?< ?= ?> ?? ?@ ?^ ?| ?~ ?\\ ?-))
+          (and (> char 127)
+               (member category '(Pc Pd Po Sm Sc Sk So))))
+      'varsym)
+     ((equal char ?:)
+      'consym)
+     ((equal char ?\')
+      'char)
+     ((equal char ?\")
+      'string)
+     ((member category '(Lu Lt))
+      'conid)
+     ((or (equal char ?_)
+          (member category '(Ll Lo)))
+      'varid)
+     ((and (>= char ?0) (<= char ?9))
+      'number)
+     ((member char '(?\] ?\[ ?\( ?\) ?\{ ?\} ?\` ?\, ?\;))
+      'special))))
+
+(defun haskell-lexeme-looking-at-token (&rest flags)
+  "Like `looking-at' but understands Haskell lexemes.
+
+Moves point forward over whitespace.  Returns a symbol describing
+type of Haskell token recognized.  Use `match-string',
+`match-beginning' and `match-end' with argument 0 to query match
+result.
+
+Possible results are:
+- 'special: for chars [](){}`,;
+- 'comment: for single line comments
+- 'nested-comment: for multiline comments
+- 'qsymid: for qualified identifiers or symbols
+- 'string: for strings literals
+- 'char: for char literals
+- 'number: for decimal, float, hexadecimal and octal number literals
+- 'template-haskell-quote: for a string of apostrophes for template haskell
+- 'template-haskell-quasi-quote: for a string of apostrophes for template haskell
+
+Note that for qualified symbols (match-string 1) returns the
+unqualified identifier or symbol.  Further qualification for
+symbol or identifier can be done with:
+
+   (haskell-lexeme-classify-by-first-char (char-after (match-beginning 1)))
+
+See `haskell-lexeme-classify-by-first-char' for details."
+  (while
+      ;; Due to how unterminated strings terminate at newline, some
+      ;; newlines have syntax set to generic string delimeter. We want
+      ;; those to be treated as whitespace anyway
+      (or
+       (> (skip-syntax-forward "-") 0)
+       (and (not (member 'newline flags))
+            (> (skip-chars-forward "\n") 0))))
+  (let
+      ((case-fold-search nil)
+       (point (point-marker)))
+    (or
+     (and
+      (equal (string-to-syntax "<")
+             (get-char-property (point) 'syntax-table))
+      (progn
+        (set-match-data (list point (set-marker (make-marker) (line-end-position))))
+        'literate-comment))
+     (and (looking-at "\n")
+          'newline)
+     (and (looking-at "{-")
+          (save-excursion
+            (forward-comment 1)
+            (set-match-data (list point (point-marker)))
+            'nested-comment))
+     (and (haskell-lexeme-looking-at-char-literal)
+          'char)
+     (and (haskell-lexeme-looking-at-string-literal)
+          'string)
+     (and (looking-at "[][(){}`,;]")
+          (if (haskell-lexeme-looking-at-quasi-quote-literal)
+              'template-haskell-quasi-quote
+            'special))
+     (and (haskell-lexeme-looking-at-qidsym)
+          (if (save-match-data
+                (string-match "\\`---*\\'" (match-string-no-properties 0)))
+              (progn
+                (set-match-data (list point (set-marker (make-marker) (line-end-position))))
+                'comment)
+            'qsymid))
+     (and (looking-at haskell-lexeme-number)
+          'number)
+     (and (looking-at "'+")
+          'template-haskell-quote)
+     (and (looking-at ".")
+          'illegal))))
+
+(provide 'haskell-lexeme)
+
+;;; haskell-lexeme.el ends here