1

I'm trying to call a python script from php. The script is executable from command line as well as from php. However, when calling it from php, I get a KeyError that I do not get when executing it from cmd.

PHP script :

$tweets = json_encode($tweets, JSON_UNESCAPED_UNICODE);
$tweetPath = storage_path()."/app/tempTweet.json";
$tweetOpen = fopen($tweetPath, 'w');
fwrite($tweetOpen, $tweets);
fclose($tweetOpen);

$cmd = "python ../app/sentiment_analysis.py ";
$output = shell_exec($cmd); //better results by using shell_exec

Python script :

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys, os, json, nltk, re
from collections import Counter
import itertools
reload(sys)
sys.setdefaultencoding('utf-8')


IS_POSSIBLY_UNDETERMINED = True
CERTAINTY_RATE = 0.15


class Tweet():
tokens = [] # List of all the tokens
text = ''

def __init__(self, rawtweet):
    self.tokens = []
    self.text = ""
    self.preprocess(rawtweet)
    self.extract_features()

def preprocess(self, rawtweet):
    try:
        rawtweet = rawtweet.lower()
        rawtweet =  re.sub('\\n','', rawtweet) #gets rid of line breaks
        rawtweet =  re.sub('@\S*','AT_USER', rawtweet) #banalizes user references
        rawtweet =  re.sub('https?://\S*', 'URL ', rawtweet)
        rawtweet =  re.sub('www\S*', 'URL ', rawtweet) #banalizes links
        # self.text = ' \u'.join(tweet.split('\\u')) # attempt to treat emojis
        rawtweet =  re.sub("[/@'\\$`,\-#%&;.:=[{}()$0.""]", '', rawtweet)
        self.text = rawtweet
    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        # print(exc_type, fname, exc_tb.tb_lineno)


def extract_features(self):

    tokens = [word for word in nltk.word_tokenize(self.text.decode('utf-8'))]

    n_grams = []
    dict_features = {}

    try:
        for t in tokens:
            n_grams.append(t)

        for t in range(len(tokens)-1): # Consecutive words
            n_grams.append('+'.join(sorted([tokens[t],tokens[t+1]]))) # Adds consecutive bigrams to n_grams


        for t in range(len(tokens)-2): # Two ahead
            n_grams.append('+'.join(sorted([tokens[t], tokens[t+2]])))

    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(exc_type, fname, exc_tb.tb_lineno)
        n_grams = []
    self.tokens = n_grams

def __del__(self):
    self.label = ''
    self.tokens = []
    self.text = ''

class Classifier():

global_dict = {}
features = {}
features_filename = ''
classifier_filename = ''

def __init__(self, **keyword_parameters):

    self.import_global_dict()

# Imports the previous information, or creates blank files and variables
def import_global_dict(self):
    self.features_filename = FEATURES_FILE
    self.classifier_filename = CLASSIFIER_FILE

    # Classifier file
    if not os.path.isfile(self.classifier_filename):
        f = open(self.classifier_filename, 'w').close()
    with open(self.classifier_filename, 'r') as f:
        p = f.read()
        if f:
            try:
                self.global_dict = Counter(json.loads(p))
            except Exception as e:
                self.global_dict = Counter(dict())
        f.close()

    # Insights file
    if not os.path.isfile(self.features_filename):
        f = open(self.features_filename, 'w').close()
    with open(self.features_filename, 'r') as f:
        p = f.read()
        if f:
            try:
                self.features = json.loads(p)
            except:
                self.features = dict()
        f.close()

def make_labels(self, tweets):
    self.global_dict = dict(self.global_dict)
    for k in tweets:
        t = Tweet(tweets[k]['content'])
        if len(t.tokens):
            output = self.label_prevision_for_tweet(t.tokens)
        if output:
            # print output
            label = output['label']
            ratio = output['ratio']

        tweets[k]['sentiment'] = {'label' : label, 'certainty' : ratio}

    return tweets

def label_prevision_for_tweet(self, tokens):
    try:
        case_positive = self.features['p(+)']
        case_negative = self.features['p(-)']
        prob_null_pos = 1000000*(1/ float((self.features['positive_tokens'] + self.features['total_tokens'])))
        prob_null_neg = 1000000*(1/ float((self.features['negative_tokens'] + self.features['total_tokens'])))

        tokens_dict = {} # Local dict to store the tweet's tokens

        for t in tokens:
            try: #If tokens exist in global_dict
                tokens_dict[t] = self.global_dict[t]
                case_positive *= 1000000*tokens_dict[t]['p(+)']
                case_negative *= 1000000*tokens_dict[t]['p(-)']

            except Exception as e: # Consider existence in dict as 0
                case_positive *= prob_null_pos
                case_negative *= prob_null_neg

        result = case_positive - case_negative
        # print result, prob_null_pos, prob_null_neg, case_negative, case_positive
        if result >= 0:
            label = 'positive'
        elif result < 0:
            label = 'negative'

        res_max = max(case_positive, case_negative)
        res_min = min(case_positive, case_negative)
        r = 1- res_min/float(res_max)
        ratio = '{:.2%}'.format(r)

        if (IS_POSSIBLY_UNDETERMINED and (r < CERTAINTY_RATE)):
            label = 'undetermined'

    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(exc_type, fname, exc_tb.tb_lineno, sys.exc_info())
        label = 'undetermined'
        ratio = 0


    results = {'label': label,'ratio': ratio}
    return results


if __name__ == '__main__':

CLASSIFIER_FILE = 'classifier_global.json'
FEATURES_FILE = 'features_global.json'
TWEET_FILE = '../storage/app/tempTweet.json'

tweets_file = unicode(TWEET_FILE, 'utf-8')
with open(tweets_file) as f:
    tweets = json.load(f)

# CLASSIFIER_FILE = str(sys.argv[2])
# FEATURES_FILE = str(sys.argv[3])
d = Classifier()


# print type(tweets)
# print tweets
labelled_tweets = d.make_labels(tweets)
print labelled_tweets

A KeyError is sent in label_prevision_for_tweet on casepositive. The return value that I get in php is KeyError('p(+)',)

9
  • 3
    that's such a trivial PHP script. why don't you do that bit in python as well Commented Jan 5, 2017 at 14:17
  • 3
    Please add the full error traceback to your question. Commented Jan 5, 2017 at 14:18
  • Don't rely on the path or working directory when executing scripts. Get the full path to the script like you do with the JSON file, use the full path to python... Commented Jan 5, 2017 at 14:32
  • @e4c5 I have other functions in that script Commented Jan 5, 2017 at 14:33
  • 1
    still an extremely messy way of doing things Commented Jan 5, 2017 at 14:36

1 Answer 1

1

Here:

        try:
            self.features = json.loads(p)
        except:
            self.features = dict()

if you fail to json.loads() your file content's (that might be empty, cf the lines just above this part) for whatever reason, you silently initialize self.features as an empty dict. No surpise you get a KeyError in this case.

The first thing to do would be to explicitely pass the absolute file(s) path to your Python script. Then, if the files are not found or if they don't contains valid json, immediatly raise an exception signaling the problem instead of trying to pretend everything's ok.

Also, your code seems to be quite a mess. Oh and you may want to learn and use Python's standard logging package which let you log exceptions in a much simpler way - or just let the exceptions propagate FWIW, it's definitly the simpler way to both make sure your code wont try to work on unexpected condition and have an accurate diagnostic of what went wrong and where the problem happened (you still get a bonus point for at least trying to print out exceptions... even if the hard way and on the wrong output).

Sign up to request clarification or add additional context in comments.

2 Comments

Thank you so much, I'm really new to Python and kinda 'pissed code' as we say in French ( ;) ). Anyways, thank you for the help
You're welcome. And if you're new to Python, really consider my advice: in Python, most of the time, no error handling is the best error handling unless you really can fix the error. Actually even if some unexpected condition happens that does not crash your program by itself (but would lead to incorrect results or so), your safest bet is probably to make it crash right now by raising some exception with as much contextual informations as possible.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.