7

I've to parse an Html for my iOS app. I read on the web that I should use the Xpath and I found the library TFHpple. I see that this library uses the XpathQuery

<!DOCTYPE "html">
<html>   
    <head>
                <meta property="og:site_name" content="Sito 4"/>
        <meta property="og:title" content="home"/>
        <meta name="viewport" content="width=320" />
        <meta name="keywords" content="mobile website,microsite, mobdis,iphone,android" />
        <meta name="description" content="Amazingly designed using MobDis.com" />
        <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
        <meta name="format-detection" content="telephone=no" />
        <title>Sito 4/home</title>

        <!--[if (!IE)|(gte IE 8)]><!-->
<link href="http://cdn2.mobdis.me/assets/publish-3.4-datauri.css" media="screen" rel="stylesheet" type="text/css" />
<!--<![endif]-->
<!--[if lte IE 7]>
<link href="http://cdn2.mobdis.me/assets/publish-3.4.css" media="screen" rel="stylesheet" type="text/css" />
<![endif]-->


        <script type="text/javascript"></script>
        <style type="text/css">
            body{
                background-color: black;
            }
        </style>

    </head>
    <body>


        <div id='processingScreen' style="width:320px;height:417px;background-color:rgb(0, 0, 128)"><img alt="Publish_loading" src="http://cdn2.mobdis.me/pro_images/publish_loading.gif" style="max-width:100%;max-height:100%;" />
        </div>

        <div data-role="page" id="home" data-id="94568">


            <div data-role="content" id="area" class="canvas" style="width:320px;height:417px;background-color:rgb(0, 0, 128)">





            <div id="text-1" data-name="text-1" class="drsElement drsMoveHandle unselectable parentDef" data-content="text" style="width: 320px; height: 50px; top: 0px; left: 0px; z-index: 1; overflow: visible; border-top-left-radius: 5px; border-top-right-radius: 5px; border-bottom-right-radius: 5px; border-bottom-left-radius: 5px; -webkit-box-shadow: rgb(255, 255, 255) 0px 2px 0px inset; "><div style="width: 320px; height: 50px; background-color: transparent; border-color: rgb(112, 150, 171); border-width: 2px; border-style: none none solid; border-top-left-radius: 5px; border-top-right-radius: 5px; border-bottom-right-radius: 5px; border-bottom-left-radius: 5px; background-image: -webkit-gradient(linear, 0% 0%, 0% 100%, from(rgb(181, 221, 242)), to(rgb(181, 221, 242))); " class="txtDef childDef editable" data-pageid="" data-pageurl="" data-exlink="" data-transition="0" id="text-1_content" title="text-1" data-allborderradius="unchecked"><div>
    &nbsp;</div>
<div>
    <span style="color:#ffffff;"><span style="font-family: futura; "><span style="font-size: 26px; "><span style="text-shadow: rgba(0, 0, 0, 0.59375) 1px 1px 2px; "><strong>Sito 4</strong></span></span></span></span></div>
</div></div><div id="image-1" data-name="image-1" class="drsElement drsMoveHandle unselectable parentDef" data-content="image" style="width: 320px; height: 169px; top: 48px; left: 0px; z-index: 2; " data-src="assets/promozioni.png"><img src="http://cdn2.mobdis.me/uploads/73005/original.png?1375193775" style="width: 320px; height: 169px; " data-pageid="" data-pageurl="" data-exlink="" data-transition="0" id="image-1_content" title="image-1" class="childDef"></div><div id="text-2" data-name="text-2" class="drsElement drsMoveHandle unselectable parentDef" data-content="text" style="width: 320px; height: 100px; top: 277px; left: 0px; z-index: 3; overflow: visible; "><div style="width: 320px; height: 100px; " class="txtDef childDef editable" data-pageid="" data-pageurl="" data-exlink="" data-transition="0" id="text-2_content" title="text-2"><div>
    <span style="color:#ffffff;"><span style="font-size: 20px; "><strong>Complimenti hai ottenuto una fantastica promozione!</strong></span></span></div>
</div></div>
            <div id="text-3" data-name="text-3" class="drsElement drsMoveHandle unselectable parentDef" data-content="text" style="width: 320px; height: 65px; top: 332px; left: 0px; z-index: 4; overflow: visible; "><div style="width: 320px; height: 65px; " class="txtDef childDef editable" data-pageid="" data-pageurl="" data-exlink="" data-transition="0" id="text-3_content" title="text-3"><div style="text-align: left; ">
    <span style="font-size:12px;"><span style="color: rgb(255, 255, 255); ">Scadenza: 29 Ago 2013</span></span></div>
</div></div>
            </div>


        </div>








            <div id="divAds"><a href="http://www.mobdis.com" target="_blank"><img alt="Banner" src="http://cdn2.mobdis.me/pro_images/banner.png" style="display:inline;max-width:100%;" /></a></div>





                <div id="divBrowserAlert">The current version of your browser is not supported yet. To ensure full functionality, we recommend using Apple Safari or Google Chrome.<br/>
                    <button onclick="dismissUnsupportedBrowser('')">Dismiss</button>

                    <!--<button onclick=window.location = "";">Redirect</button>-->

                </div>

                <script type="text/javascript">

                </script>




        <script type="text/javascript">
            var mpq = [];
            mpq.push(["init", '5fed56e1e4a85990c0fb6ff5294d2d42']);
            (function(){var b,a,e,d,c;b=document.createElement("script");b.type="text/javascript";b.async=true;b.src=(document.location.protocol==="https:"?"https:":"http:")+"//api.mixpanel.com/site_media/js/api/mixpanel.js";a=document.getElementsByTagName("script")[0];a.parentNode.insertBefore(b,a);e=function(f){return function(){mpq.push([f].concat(Array.prototype.slice.call(arguments,0)))}};d=["init","track","track_links","track_forms","register","register_once","identify","name_tag","set_config"];for(c=0;c<d.length;c++){mpq[d[c]]=e(d[c])}})();
            mpq.track("project_id-13003");
            mpq.name_tag('mobdis');
        </script>

                <div id="scriptImports">
            <script src="http://cdn2.mobdis.me/assets/jq-3.4.js" type="text/javascript"></script>

            <script src="http://cdn2.mobdis.me/assets/publish-3.4.js" type="text/javascript"></script>
            <script src="http://cdn2.mobdis.me/assets/jqm-3.4.js" type="text/javascript"></script>






<!--   -->

            <script type="text/javascript">
                var root = "http://sito4.mobdis.co/";
                var cdn_link = "http://cdn2.mobdis.me/";

                var require_passcode = false;

                var showWaterMark = false;
                var showNone = false;


            </script>

        </div>

        <div id="divAutho" class="autho_56930"></div>


        <div id="restrictMode" class="divBasic"></div>
        <noscript style="color:red;font-size:1em;top:0px;left:0px;position:absolute;display:block;background: #363636;color:#fbb041;height:100%;text-align:center;">Javascript is disabled in your browser.<br /> Please enable the javascript.</noscript>
    </body>
</html>

I'm interesting in two information about this html:

  • this <title>Sito 4/home</title>

  • this <span style="color: rgb(255, 255, 255); ">Scadenza: 29 Ago 2013</span>

For now I wrote this code:

- (void)loadDataFromHtml {
    NSURL *url = [NSURL URLWithString:stringUrl];
    NSData *data = [NSData dataWithContentsOfURL:url];

    TFHpple *parser = [TFHpple hppleWithHTMLData:data];

    NSString *XpathQueryStringTitle = @"//title";
    NSArray *nodes = [parser searchWithXPathQuery:XpathQueryStringTitle];

    NSMutableArray *dataArray = [[NSMutableArray alloc]initWithCapacity:0];
    for (TFHppleElement *element in nodes) {
        HtmlData *htmlData = [[HtmlData alloc]init];
        [dataArray addObject:htmlData];
        htmlData.title = [[element firstChild]content];
        htmlTitle = htmlData.title;
        htmlTitle = [htmlTitle substringToIndex:6];
        htmlTitle = [htmlTitle stringByReplacingOccurrencesOfString:@" " withString:@""];
    }
}

(Based on this tutorial: http://www.raywenderlich.com/14172/how-to-parse-html-on-ios)

Now my question is how I can find the second information I need? Should I made another NSString to obtain the second information? Can you help me to solve this problem? Thank you

2
  • Yes, the easiest way is to make another XpathQueryStringTitle with the xpath to the element you want. You can use the same parser and the same html data of course ;) Commented Jul 31, 2013 at 9:24
  • How to get Image URL by using above code? I got title. But I also need Image URL. Commented Mar 30, 2016 at 6:10

3 Answers 3

7

I solved my issue by using Firebug. I opened my page with Firefox and I opened Firebug, with this I selected the row where'd the information I need then I clicked on it with right button of the mouse and I choose "Copy Xpath" and I paste this in my app and it's working!

Thank you for your suggestion!

Sign up to request clarification or add additional context in comments.

1 Comment

Sweet and Simple !
4

Check with this:

https://github.com/mwaterfall/MWFeedParser

https://github.com/zootreeves/Objective-C-HMTL-Parser

This will provide the HTML Parser for iphone sdk.

More help on:

5 Comments

@PaoloRobertetti No, IMHO it is great (because very easy to use) class ;)
@HAS: Thank you, can you suggest me how to build a Xpath Query to search this <span style="color: rgb(255, 255, 255); ">Scadenza: 29 Ago 2013</span> in my html?
Are the attributes always exactly like this: style="color: rgb(255, 255, 255); "?
Yes, you can look in my question there are the complete html that I've to parse. Exactly it's this: pastebin.com/Wv2DkNWb
According the HTML you posted in the question try this one: /html/body/div[2]/div[1]/div[4]/div[1]/div[1]/span/span.
1

I have successfully used Nolan Waite's HTMLReader project to parse a very complicated HTML5 response easily and reliably.

It's easy to setup and use, I highly recommend it. All the other projects I found are old, unmaintained or are intended to parse strict XHTML / XML.

Here's the example from the README:

#import <HTMLReader/HTMLReader.h>

// Parse a string and find an element.
NSString *markup = @"<p><b>Ahoy there sailor!</b></p>";
HTMLDocument *document = [HTMLDocument documentWithString:markup];
NSLog(@"%@", [document firstNodeMatchingSelector:@"b"].textContent);
// => Ahoy there sailor!

// Wrap one element in another.
HTMLElement *b = [document firstNodeMatchingSelector:@"b"];
NSMutableOrderedSet *children = [b.parentNode mutableChildren];
HTMLElement *wrapper = [[HTMLElement alloc] initWithTagName:@"div"
                                                 attributes:@{@"class": @"special"}];
[children insertObject:wrapper atIndex:[children indexOfObject:b]];
b.parentNode = wrapper;
NSLog(@"%@", [document.rootElement serializedFragment]);
// => <html><head></head><body><p><div class="special"> \
<b>Ahoy there sailor!</b></div></p></body></html>

// Load a web page.
NSURL *URL = [NSURL URLWithString:@"https://github.com/nolanw/HTMLReader"];
NSURLSession *session = [NSURLSession sharedSession];
[[session dataTaskWithURL:URL completionHandler:
  ^(NSData *data, NSURLResponse *response, NSError *error) {
      NSString *contentType = nil;
      if ([response isKindOfClass:[NSHTTPURLResponse class]]) {
          NSDictionary *headers = [(NSHTTPURLResponse *)response allHeaderFields];
          contentType = headers[@"Content-Type"];
      }
      HTMLDocument *home = [HTMLDocument documentWithData:data
                                        contentTypeHeader:contentType];
      HTMLElement *div = [home firstNodeMatchingSelector:@".repository-description"];
      NSCharacterSet *whitespace = [NSCharacterSet whitespaceAndNewlineCharacterSet];
      NSLog(@"%@", [div.textContent stringByTrimmingCharactersInSet:whitespace]);
      // => A WHATWG-compliant HTML parser in Objective-C.
  }] resume];

2 Comments

Hi Dhiraj, how would you get the 'og:title' using HTMLReader? I'm trying to use the selector: @"meta[property=og:title]" with no success...
Thank you @Dhiraj for referring this awesome HTMLReader project. Even in 2021 it's still well-maintained.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.