;;; ;;; Emacs code for displaying and labelling Tokens in context ;;; ;;; Need full default analysis (from lexicon), and maybe dynamic ;;; analysis (setq toklab-mode-map (make-keymap)) (suppress-keymap toklab-mode-map) ;(defun (toklab-mode-from-command arg) ; (interactive "p") ; (toklab-mode)) (defun toklab-mode () "Mode for labelling tokens. Letters will overwrite column one and skip to beginning of next line. The currently bound keys are SPACE accept given guess u move up one line m MSPL misspelled word geogaphy e EXPN abbreviation/contraction adv, N.Y, ft l LSEQ letter sequence CIA, D.C. a ASWD read as word proper names, words not in lex etc. f FNSP funny spelling slllooowwww . OTHER something requiring further analysis, or unknown x NONE ignore it formating junk, ascii art Number based tokens n NUM number (cardinal) 12, 45 t NTEL telephone (or part of) 212 555-4523 z NZIP zip code (PO Box) 91020 d NDIG number as digits 747, 386 o NORD number as ordinal (date) May 7, 3rd, Bill Gates III i NIDE identified (?) c NTIME a (compound) time 3.20 12:15 C NDATE a (compound) date 2/2/99 u URL email/pathname/url http://slashdot.org /usr/local y NYER a year (or years) 1998 80s 1900s 2003 , NADDR a street address number 5000 Pennsylvania, 4245 Forbes Ave $ MONEY money (US or otherwise) $3.45 b BMONY money followed by [mb]illions $3 billion r enter your own label in the minibuffer / offer token to be split into segments " (interactive "p") (kill-all-local-variables) (setq major-mode 'toklab-mode) (setq mode-name "TokLab") (setq truncate-lines t) (setq scroll-step 32) (use-local-map toklab-mode-map) (make-local-variable 'local-object)) (define-key toklab-mode-map "u" 'previous-line) (defun toklab-define-key (key val) "Add function binding so ket inserts val at beginning of line." (let ((name (intern (format "toklab-insert-%s" val)))) (eval (list 'defun name '(arg) '(interactive "p") '(beginning-of-line) (list 'insert-string val) (list 'delete-char (length val)) '(forward-line) '(start-of-nsw))) (define-key toklab-mode-map key name))) (defun toklab-asis (arg) (interactive "p") (let (ee ss val) (beginning-of-line) (if (not (string-equal " " (char-to-string (char-after (point)))) ) ;; theres a tag at the start already (forward-token 2) (forward-token 1)) (setq ee (point)) (backward-token 1) (setq ss (point)) (setq val (buffer-substring ss ee)) (beginning-of-line) (insert-string val) (delete-char (length val)) (forward-line) (start-of-nsw))) (defun forward-token (n) (interactive "p") (cond ((<= n 0) nil) (t (search-forward-regexp "[^ ]") (backward-char 1) (search-forward " ") (backward-char 1) (forward-token (- n 1))))) (defun backward-token (n) (interactive "p") (cond ((<= n 0) nil) (t (search-backward-regexp "[^ ]") (forward-char 1) (search-backward " ") (forward-char 1) (backward-token (- n 1))))) (defun start-of-nsw () (interactive "p") (beginning-of-line) (setq ok_pos (point)) (end-of-line) (setq end_limit (point)) (beginning-of-line) (if (< end_limit (point-max)) (progn (if (not (string-equal " " (char-to-string (char-after (point)))) ) ;; theres a tag at the start already (forward-token 2) (forward-token 1)) (forward-token 6) (backward-token 1) (if (> (point) end_limit) (goto-char ok_pos))))) (defun toklab-readinfo (arg) (interactive "p") (let (ee ss val) (beginning-of-line) (if (not (string-equal " " (char-to-string (char-after (point)))) ) ;; theres a tag at the start already (forward-token 2) (forward-token 1)) (setq ee (point)) (backward-token 1) (setq ss (point)) (beginning-of-line) (setq val (read-from-minibuffer "other label: ")) (beginning-of-line) (insert-string val) (if (> (length val) 5) (delete-char 5) (delete-char (length val))) (forward-line) (start-of-nsw))) (defun insert-subsegments (segs) (interactive "p") (cond ((null segs) nil) (t (end-of-line) (newline) (insert-string " ") (insert-string (car segs)) (beginning-of-line) (insert-subsegments (cdr segs))))) (defun toklab-splittoken (arg) (interactive "p") (let (ee ss val) (beginning-of-line) (if (not (string-equal " " (char-to-string (char-after (point)))) ) ;; theres a tag at the start already (forward-word 2) (forward-word 1)) ;; Need to move forward to find token that is to be labelled (forward-token 6) (setq ee (point)) (backward-token 1) (setq ss (point)) (setq token (buffer-substring ss ee)) (setq segments (read-from-minibuffer "add spaces between segments: " token)) ;; Split and insert new segments (beginning-of-line) (insert-subsegments (tok-split-string segments)) (goto-char ss) (beginning-of-line) (insert-string "SPLT ") (delete-char (length "SPLT ")) (forward-line) (beginning-of-line))) ;; Emacs 19 doesn't have split-string so have to write on myself ;; (defun tok-split-string (str) ;; splits the string into substrings at white space (let ((subs nil) (i 0) (st 0) (ln 0)) (while (< i (length str)) (cond ((string-match "[ \f\t\n\r\v]" (substring str i (+ i 1))) (if (> ln 0) (setq subs (cons (substring str st (+ st ln)) subs))) (setq ln 0) (setq st (+ 1 i))) (t (setq ln (+ 1 ln)))) (setq i (+ 1 i))) (if (> ln 0) (setq subs (cons (substring str st (+ st ln)) subs))) (reverse subs))) (define-key toklab-mode-map " " 'toklab-asis) (define-key toklab-mode-map "r" 'toklab-readinfo) (define-key toklab-mode-map "/" 'toklab-splittoken) ;; space estimate is right ;; enter new definition char string (?) ;(toklab-define-key "p" "PLU ") ;; plural (toklab-define-key "m" "MSPL ") ;; misspelled word (toklab-define-key "e" "EXPN ") ;; expand abbreviation (toklab-define-key "l" "LSEQ ") ;; letter sequence (toklab-define-key "a" "ASWD ") ;; read as word (toklab-define-key "f" "FNSP ") ;; funny spelling (capitalization) (toklab-define-key "x" "NONE ") ;; for wrong identification of tag (toklab-define-key "." "OTHER") ;; something that should be relabelled ;(toklab-define-key "w" "WORD ") ;; for wrong identification of tag ;(toklab-define-key "?" "TRASH") ;; others (toklab-define-key "t" "NTEL ") ;; telephone number (toklab-define-key "z" "NZIP ") ;; zip code (toklab-define-key "d" "NDIG ") ;; number as digits (toklab-define-key "n" "NUM ") ;; number as ammount (toklab-define-key "o" "NORD ") ;; number as ordinal (toklab-define-key "i" "NIDE ") ;; number as identifier (toklab-define-key "c" "NTIME") ;; compound time (toklab-define-key "C" "NDATE") ;; compound data (toklab-define-key "u" "URL ") ;; email/newsgroup/url (toklab-define-key "y" "NYER ") ;; number as year (toklab-define-key "$" "MONEY") ;; money (toklab-define-key "b" "BMONY") ;; money (with .*illion after it) (toklab-define-key "%" "PRCT ") ;; percentage (toklab-define-key "s" "SLNT ") ;; silent (toklab-define-key "S" "SCORE") ;; x-x (toklab-define-key "," "NADDR") ;; silent ;(toklab-define-key "X" "LEXN ") ;; token is in NSW lexicon ;(setq auto-mode-alist ; (append '(("\\.feats$" . toklab-mode)) auto-mode-alist))