phpython Code
a python interpreter written in php
Status: Pre-Alpha
Brought to you by:
francescobianco
--- a/trunk/python/python-tokenizer.php +++ b/trunk/python/python-tokenizer.php @@ -102,6 +102,8 @@ --> <?php + $python_tokens = array("for","in"); + function group($re) { if (is_array($re)) { return "(".implode("|",$re).")"; @@ -115,6 +117,12 @@ function maybe($re) { return group($re)."?"; } + function token($name,$value=null) { + return array( + "name" => $name, + "value" => $value + ); + } $preg_whitespace = "[ \f\t]*"; $preg_comment = "#[^\r\n]*"; @@ -122,36 +130,61 @@ $preg_name = "[a-zA-Z_]\w*"; function python_tokenize($pycode) { + global $python_tokens; + $line = $pycode; + $indent = 0; + $out = array(); while (strlen($line)) { - if (preg_match('/^[ \t]+(\.[0-9]*)?/', $line, $regs)) { + if (preg_match('/^[ ]+/', $line, $regs)) { # ignored #$out[] = $regs[0]; $line = substr($line, strlen($regs[0])); - + + } else if (preg_match('/^[\t]+/', $line, $regs)) { + # indent/dedent + #$out[] = $regs[0]; + $d = strlen($regs[0]) - $indent; + switch($d) { + case 0: break; + case +1: $out[]=token("INDENT"); break; + case -1: $out[]=token("DEDENT"); break; + default: die("Expectend indented block"); + } + $line = substr($line, strlen($regs[0])); + + } else if (preg_match('/^:/', $line, $regs)) { + # newline + $out[] = token("COLON"); + $line = substr($line, strlen($regs[0])); + } else if (preg_match('/^\n/', $line, $regs)) { # newline - $out[] = "__NEWLINE__"; + $out[] = token("NEWLINE"); $line = substr($line, strlen($regs[0])); } else if (preg_match('/^[0-9]+(\.[0-9]*)?/', $line, $regs)) { # number - $out[] = $regs[0]; + $out[] = token("NUMBER",$regs[0]); $line = substr($line, strlen($regs[0])); - } else if (preg_match('/^"([^"]*)"/', $line, $regs)) { + } else if (preg_match('/^"([^"]*((\\\\")*[^"]*))"/', $line, $regs)) { # double quoted string #var_dump($regs); - $out[] = $regs[1]; + $out[] = token("DSTRING",$regs[1]); $line = substr($line, strlen($regs[0])); } else if (preg_match('/^[A-Za-z_][A-Za-z0-1_]*/', $line, $regs)) { - # name - $out[] = $regs[0]; + # name or litteral-token + if (in_array($regs[0],$python_tokens)) { + $out[] = token(strtoupper($regs[0])); + } else { + $out[] = token("NAME",$regs[0]); + } $line = substr($line, strlen($regs[0])); - + } else { # rest $out[] = $line[0];