5
\$\begingroup\$

ApplicationDetail.txt:

URL: https://www.abc.com
applicationNo : 123456

Application Class:

from dataclasses import dataclass


@dataclass
class Application(object):
""" Binding Json Data to this Class """
    Status: str
    ApplicationType: str
    StatusDate: str
    Location: str
    LocationDate: str
    ConfirmationNumber: int
    FirstNamedApplicant: str
    EntityStatus: str

Code:

from selenium import webdriver
import json
from selenium.webdriver.common.by import By
import logging
import traceback
from Application import Application  # importing Application Class


def PrintObj(applicationObject):
"""This Function Will Print Object Binded to Application Class"""
    try:
        print('     ***************** After Deserialize *****************')
        print('Status: %s' % applicationObject.Status)
        print('ApplicationType: %s' % applicationObject.ApplicationType)
        print('StatusDate: %s' % applicationObject.StatusDate)
        print('Location: %s' % applicationObject.Location)
        print('LocationDate: %s' % applicationObject.LocationDate)
        print('ConfirmationNumber:', applicationObject.ConfirmationNumber)
        print('FirstNamedApplicant: %s' % applicationObject.FirstNamedApplicant)
        print('EntityStatus: %s' % applicationObject.EntityStatus)
        print('---------------------------------------------------')
    except AttributeError as ex:
        print(ex)
        logging.error(ex, exc_info=True)
    except:
        print('ERROR Occurred in PrintObj Method')
        logging.error('ERROR Occurred in PrintObj Method', exc_info=True)


def ApplicationDetail(path, mode):
    """This Function Will Fetch ApplicationDetail from .txt File in given path"""
    try:
        logging.info('Opening ApplicationDetail File')
        with open(path, mode) as file:  # Opening ApplicationDetail File
            webSiteURL = file.readline().replace('URL : ', '').replace('\n', '')
            application_no = file.readline().replace('applicationNo : ', '').replace('\n', '')
            file.close()  # closed ApplicationDetail File
        logging.info('closed ApplicationDetail File')
        return webSiteURL, application_no
    except FileNotFoundError as ex:
        print(ex)
        logging.critical(ex, exc_info=True)
    except:
        print('Something ERROR Occurred in ApplicationDetail Method')
        logging.critical('Something ERROR Occurred in ApplicationDetail Method', exc_info=True)


def JsonStringSerialize(recordDictionary):
    """This Function Will Convert Formal Parameter(recordDictionary) to json_string and Write in .json File"""
    try:
        jsonString = json.dumps(recordDictionary, indent=4)  # serilazing recordDictionary
        logging.info('Serialzing Done')
        with open('ApplicationData.json', 'w') as f_Out:
            logging.info('Writing in .json File')
            f_Out.write(jsonString)  # writing in .json file 
            logging.info('Writing completed in .json File')
            f_Out.close()  # closing .json File
    except json.encoder.JSONEncoder:
        print('Cannot Serializable')
        logging.error('Cannot Serializable', exc_info=True)
    except:
        print('Serialization Failed')
        logging.error('Serialization Failed', exc_info=True)


def DeserializeJson():
"""This Function Will Fetch JSON from .json File, Deserialize and Bind to Application Class"""
    try:
        with open('ApplicationData.json', 'r') as f_Out:
            jsonString = json.load(f_Out)  # Deserializing Json Data
        logging.info('Deserialzing Done')
        return Application(**jsonString)  #  Binding to Application Class
    except json.decoder.JSONDecodeError:
        print('Cannot Deserializable')
        logging.error('Cannot Deserializable', exc_info=True)
    except:
        print('Deserialization Failed')
        logging.error('Deserialization Failed', exc_info=True)


def ScrapData(URL, applicationNo, xpath, elementID, xpath2):
    """This Function will scrap data from given URL"""
    try:
        webBrowser = webdriver.Ie(r'C:\Users\XYZ\WebDriver\IEDriverServer.exe')
        webBrowser.get(URL)
        webBrowser.implicitly_wait(15)
        webBrowser.find_element_by_xpath(xpath).send_keys(applicationNo)  # webBrowser.find_elements_by_class_name('saeRow').text
        webBrowser.find_element_by_id(elementID).click()
        logging.info('Scraping Started')
        applicationData = [td.text for td in webBrowser.find_elements_by_xpath(xpath2)]
        logging.info('Scraping Completed')
        return {'Status': applicationData[0], 'ApplicationType': applicationData[1],
                'StatusDate': applicationData[2], 'Location': applicationData[3],
                 'LocationDate': applicationData[4],
                'ConfirmationNumber': applicationData[5],
                'FirstNamedApplicant': applicationData[6],
                'EntityStatus': applicationData[7]}
    except:
        print('Something Error Occurred in ScrapData Method')
        logging.critical('Something Error Occurred in ScrapData Method', exc_info=True)
    finally:
        webBrowser.__exit__()


if __name__ == '__main__':
    try:
        logging.basicConfig(format='%(levelname)s - %(asctime)s - %(message)s', datefmt='%Y-%m-%d %I:%M:%S %p',
                        filename='Log_File.log', level=logging.DEBUG)
        logging.info('Task Started')
        webSiteURL, applicationNo = ApplicationDetail('ApplicationDetails.txt', 'r')
        JsonStringSerialize(ScrapData(webSiteURL, applicationNo, '//input[@id="id"]', "Submit", '//td'))
        if input('      U Want to Deserialize y || n\n') == 'y':
            PrintObj(DeserializeJson())
    except TypeError as ex:
        print(ex)
        logging.critical(ex, exc_info=True)
    except NameError as ex:
        print(ex)
        logging.critical(ex, exc_info=True)
    except ModuleNotFoundError as ex:
        print(ex)
        #traceback.print_exc()
        logging.critical(ex, exc_info=True)
    except:
        print('Something Error Occurred in Main Method')
        logging.critical('Something Error Occurred in Main Method')
    finally:
        logging.info('Task Completed')

The above project will scrape data from the website using selenium in Python. Any suggestion/review of the above code will be helpful.

\$\endgroup\$

1 Answer 1

1
\$\begingroup\$

Portability

When I run the code, I get a syntax error on lines like this:

def PrintObj(applicationObject):
"""This Function Will Print Object Binded to Application Class"""

The problem is that the docstring is not indented properly. This fixes the error for me:

def PrintObj(applicationObject):
    """This Function Will Print Object Binded to Application Class"""

Perhaps your version of Python is more forgiving.

Unused code

ruff identifies these import lines as not needed:

from selenium.webdriver.common.by import By

import traceback

They should be removed.

Comments

This comment is not needed because it merely re-states what the code already makes clear:

from Application import Application  # importing Application Class

The same is true for this comment:

logging.info('Opening ApplicationDetail File')
with open(path, mode) as file:  # Opening ApplicationDetail File

Remove commented-out code to reduce clutter:

webBrowser.find_element_by_xpath(xpath).send_keys(applicationNo)  # webBrowser.find_elements_by_class_name('saeRow').text

Naming

The word "scrap" is often misused in this context. The word "scrape" should be used. For example, change ScrapData to ScrapeData. Note that "Scraping" is correct.

The PEP 8 style guide recommends snake_case for function and variable names. For example, ScrapData would be scrape_data. applicationNo would be application_no, etc.

DRY

The same code is used in all these except branches:

except TypeError as ex:
    print(ex)
    logging.critical(ex, exc_info=True)
except NameError as ex:
    print(ex)
    logging.critical(ex, exc_info=True)
except ModuleNotFoundError as ex:
    print(ex)
    logging.critical(ex, exc_info=True)

Multiple errors can be combined in a tuple to eliminate the repetition:

except (TypeError, NameError, ModuleNotFoundError) as ex:
    print(ex)
    logging.critical(ex, exc_info=True)
\$\endgroup\$

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.