/home/mjc1/public_html/lib/html_dom/example/scraping/example_scraping_imdb.php


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
<?php
include_once('../../simple_html_dom.php');

function 
scraping_IMDB($url) {
    
// create HTML DOM
    
$html file_get_html($url);

    
// get title
    
$ret['Title'] = $html->find('title'0)->innertext;

    
// get rating
    
$ret['Rating'] = $html->find('div[class="general rating"] b'0)->innertext;

    
// get overview
    
foreach($html->find('div[class="info"]') as $div) {
        
// skip user comments
        
if($div->find('h5'0)->innertext=='User Comments:')
            return 
$ret;

        
$key '';
        
$val '';

        foreach(
$div->find('*') as $node) {
            if (
$node->tag=='h5')
                
$key $node->plaintext;

            if (
$node->tag=='a' && $node->plaintext!='more')
                
$val .= trim(str_replace("\n"''$node->plaintext));

            if (
$node->tag=='text')
                
$val .= trim(str_replace("\n"''$node->plaintext));
        }

        
$ret[$key] = $val;
    }
    
    
// clean up memory
    
$html->clear();
    unset(
$html);

    return 
$ret;
}


// -----------------------------------------------------------------------------
// test it!
$ret scraping_IMDB('http://imdb.com/title/tt0335266/');

foreach(
$ret as $k=>$v)
    echo 
'<strong>'.$k.' </strong>'.$v.'<br>';
?>