-1

Hello have been trying to create random data with random dates as into a csv file but getting the following error expected str instance, numpy.datetime64 found

code for data generator

import pandas as pd
import numpy as np
import string
import random

def gen_random_email():
    domains = [ "hotmail.com", "gmail.com", "aol.com", "mail.com" , "mail.kz", "yahoo.com"]
    letters = string.ascii_letters +'.'*5
    
    email = ''.join(np.random.choice(list(letters),10))+'@'+ np.random.choice(domains)
    email = email.replace('.@', '@')
    return email, "Email"

def gen_random_float():
    num = np.random.random()*np.random.randint(2000)
    
    decimal_points = np.random.randint(8)
    num = int(num*10**(decimal_points))/10**decimal_points
    
    return str(num), 'Float'

def gen_random_sentence():
    nouns = ["puppy", "car", "rabbit", "girl", "monkey"]
    verbs = ["runs", "hits", "jumps", "drives", "barfs"]
    adv = ["crazily", "dutifully", "foolishly", "merrily", "occasionally"]
    adj = ["adorable.", "clueless.", "dirty.", "odd.", "stupid."]

    random_entry = lambda x: x[random.randrange(len(x))]
    
    random_entry = " ".join([random_entry(nouns), random_entry(verbs), 
                     random_entry(adv), random_entry(adj)])

    return random_entry, 'String'

def gen_random_int():
    num = np.random.randint(1000000)
    
    return str(num), 'Int'

def gen_random_date():
    monthly_days = np.arange(0, 30)
    base_date = np.datetime64('2020-01-01')
    random_date = base_date + np.random.choice(monthly_days)
    return random_date, 'Date'

def gen_dataset(filename, size=5000):
    randomizers = [gen_random_email, gen_random_float, gen_random_int, gen_random_sentence,gen_random_date]
    with open(filename, 'w') as file:
        file.write("Text, Type\n")
        for _ in range(size):
            file.write(",".join(random.choice(randomizers)())+"\n") 

gen_dataset('dataaaa.csv')   
    
TypeError: sequence item 0: expected str instance, numpy.datetime64 found 
2
  • When posting a question about code that produces an Exception, always include the complete Traceback - copy and paste it then format it as code (select it and type ctrl-k) Commented Jun 22, 2020 at 2:41
  • Any chance you could reduce that to a minimal reproducible example? Commented Jun 22, 2020 at 2:43

1 Answer 1

0

First, catch the error and see what is causing it.

def gen_dataset(filename, size=5000):
    randomizers = [gen_random_email, gen_random_float, gen_random_int, gen_random_sentence,gen_random_date]
    with open(filename, 'w') as file:
        file.write("Text, Type\n")
        for _ in range(size):
            f = random.choice(randomizers)
            result = f()
            try:
                file.write(",".join(result)+"\n")
            except TypeError:
                print(result)
                raise

>>>
(numpy.datetime64('2020-01-09'), 'Date')
Traceback (most recent call last):
  File "C:\pyProjects\tmp.py", line 80, in <module>
    gen_dataset('dataaaa.csv')
  File "C:\pyProjects\tmp.py", line 75, in gen_dataset
    file.write(",".join(result)+"\n")
TypeError: sequence item 0: expected str instance, numpy.datetime64 found

hmmm, I wonder if join only except strings as arguments?

Yep, from the docs:

A TypeError will be raised if there are any non-string values in iterable, including bytes objects.

I wonder how I can turn a numpy datetime64 to a string. Searching with numpy datetime64 to string is productive: Convert numpy.datetime64 to string object in python

These work

>>> q = gen_random_date()[0]
>>> q
numpy.datetime64('2020-01-27')
>>> np.datetime_as_string(q)
'2020-01-27'
>>> q.astype(str)
'2020-01-27'
>>>

Then just modify the try/except.

def gen_dataset(filename, size=5000):
    randomizers = [gen_random_email, gen_random_float, gen_random_int, gen_random_sentence,gen_random_date]
    with open(filename, 'w') as file:
        file.write("Text, Type\n")
        for _ in range(size):
            f = random.choice(randomizers)
            a,b = f()
            try:
                q = ",".join([a,b,"\n"])
            except TypeError:
                a = np.datetime_as_string(a)
                q = ",".join([a,b,"\n"])
            file.write(q)

Or simply preemptively make the first item a string.

def gen_dataset(filename, size=5000):
    randomizers = [gen_random_email, gen_random_float, gen_random_int, gen_random_sentence,gen_random_date]
    with open(filename, 'w') as file:
        file.write("Text, Type\n")
        for _ in range(size):
            f = random.choice(randomizers)
            a,b = f()
            q = ",".join([str(a),b,"\n"])
            file.write(q)
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.