4

A equation takes values in the following form :

   x = [0x02,0x00]  # which is later internally converted to in the called function to  0x300
   y = [0x01, 0xFF]
   z = [0x01, 0x0F]

How do I generate a series of test values for this function ? for instance I want to send a 100 odd values from a for loop

for i in range(0,300):
   # where a,b are derived for a range
   x = [a,b]

My question was a bit unclear so please let my clarify. what I wanted to ask how I can do x =[a,b] generate different values for a,b

3 Answers 3

4

use generators:

def gen_xyz( max_iteration ):
    for i in xrange( 0, max_iteration ):
       # code which will generate next ( x, y, z )
       yield ( x, y, z ) 

for x, y, z in gen_xyz( 1000 ):
  f( x, y, z )
Sign up to request clarification or add additional context in comments.

Comments

2

The hex() function?

import random
for i in range(10):
    a1, a2 = random.randint(1,100), random.randint(1,100)
    x = [hex(a1), hex(a2)]
    print x

..outputs something similar to..

['0x21', '0x4f']
['0x59', '0x5c']
['0x61', '0x40']
['0x57', '0x45']
['0x1a', '0x11']
['0x4c', '0x49']
['0x40', '0x1b']
['0x1f', '0x7']
['0x8', '0x2b']
['0x1e', '0x13']

Comments

-3
import pandas as pd
import numpy as np
import random
from datetime import timedelta
import re
import os

# Function to generate unique random integers within the range of a column in the original dataframe
def generate_unique_random_numbers(df_new, df_original, num_cols, num_rows):
    for col in num_cols:
        min_val = int(df_original[col].min())
        max_val = int(df_original[col].max())
        # Generate a list of random integers (with or without replacement)
        if max_val - min_val + 1 >= num_rows:
            df_new[col] = random.sample(range(min_val, max_val + 1), num_rows)
        else:
            # If unique values are insufficient, use sampling with replacement
            df_new[col] = random.choices(range(min_val, max_val + 1), k=num_rows)
    return df_new

# Function to generate random dates within the date range of a column in the original dataframe
def generate_random_dates(df_new, df_original, date_cols, num_rows):
    for col in date_cols:
        if pd.api.types.is_datetime64_any_dtype(df_original[col]):
            min_date = df_original[col].min()
            max_date = df_original[col].max()
            df_new[col] = [min_date + timedelta(days=random.randint(0, (max_date - min_date).days)) for _ in range(num_rows)]
        else:
            # If the date format is different (like strings), keep it as a constant
            df_new[col] = [df_original[col].iloc[0]] * num_rows
    return df_new

# Function to generate random email IDs based on a base email string
def generate_random_emails(df_new, email_col, base_email, num_rows):
    email_username, email_domain = base_email.split('@')
    # Generate unique email IDs using a base email and an index
    df_new[email_col] = [f"{email_username}{i+1}@{email_domain}" for i in range(num_rows)]
    return df_new

# Function to detect email columns based on the content
def detect_email_columns(df):
    email_columns = []
    email_pattern = re.compile(r"[^@]+@[^@]+\.[^@]+")

    for col in df.columns:
        # Check if the column contains at least one value matching the email pattern
        if df[col].astype(str).apply(lambda x: bool(email_pattern.match(x))).any():
            email_columns.append(col)
    
    return email_columns

# Main function to read the input file, generate new test data, and save/print it
def generate_test_data(input_file, num_rows, output_folder, default_values={}, base_email="[email protected]"):
    # Read the input Excel file with multiple sheets
    excel_data = pd.read_excel(input_file, sheet_name=None, skiprows=1)
    
    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    
    for sheet_name, df_original in excel_data.items():
        print(f"\nProcessing sheet: {sheet_name}")
        
        # Create a new dataframe with the specified number of rows, filled with NaN initially
        df_new = pd.DataFrame(index=range(num_rows), columns=df_original.columns)
        
        # Identify numeric columns
        num_cols = df_original.select_dtypes(include=[np.number]).columns.tolist()
        
        # Identify date columns
        date_cols = df_original.select_dtypes(include=['datetime']).columns.tolist()
        
        # Detect email columns based on the content
        email_cols = detect_email_columns(df_original)
        
        # Fill numeric columns with unique random integers or sampled values
        df_new = generate_unique_random_numbers(df_new.copy(), df_original, num_cols, num_rows)
        
        # Fill date columns with random dates
        df_new = generate_random_dates(df_new.copy(), df_original, date_cols, num_rows)
        
        # Generate random email IDs if email columns are detected
        for email_col in email_cols:
            df_new = generate_random_emails(df_new.copy(), email_col, base_email, num_rows)
        
        # Add constant default values
        for col, default_value in default_values.items():
            if col in df_new.columns:
                df_new[col] = [default_value] * num_rows
        
        # Print the generated data to the console for verification
        print(f"Generated test data for sheet '{sheet_name}':")
        print(df_new.head(num_rows).to_csv(index=False))
        
        # Save the generated data to a CSV file in the output folder
        output_file = os.path.join(output_folder, f"{sheet_name}_Generated_Test_Data.csv")
        df_new.to_csv(output_file, index=False)
        print(f"Generated test data saved to {output_file}")

# Example usage:
default_values = {
    'Status': 'Active',  # Example: All rows will have 'Active' in the 'Status' column
    'Country': 'USA'
}

# Input Excel file
input_file = r"C:\Test_data.xlsx"

# Output folder for generated CSV files
output_folder = r"C:\Generated_Test_Data"

    

# Specify the number of rows to generate (e.g., 10 rows)
num_rows = 10

# Generate, print, and save the test data for each sheet
generate_test_data(input_file, num_rows, output_folder, default_values=default_values, base_email="[email protected]")

1 Comment

How is this related to the question asked from OP?

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.