| 1 |
/* $Id$ */ |
|---|
| 2 |
|
|---|
| 3 |
/* |
|---|
| 4 |
* Copyright (c) 2003-2007 Axel Andersson |
|---|
| 5 |
* All rights reserved. |
|---|
| 6 |
* |
|---|
| 7 |
* Redistribution and use in source and binary forms, with or without |
|---|
| 8 |
* modification, are permitted provided that the following conditions |
|---|
| 9 |
* are met: |
|---|
| 10 |
* 1. Redistributions of source code must retain the above copyright |
|---|
| 11 |
* notice, this list of conditions and the following disclaimer. |
|---|
| 12 |
* 2. Redistributions in binary form must reproduce the above copyright |
|---|
| 13 |
* notice, this list of conditions and the following disclaimer in the |
|---|
| 14 |
* documentation and/or other materials provided with the distribution. |
|---|
| 15 |
* |
|---|
| 16 |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
|---|
| 17 |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|---|
| 18 |
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|---|
| 19 |
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, |
|---|
| 20 |
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
|---|
| 21 |
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
|---|
| 22 |
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|---|
| 23 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
|---|
| 24 |
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
|---|
| 25 |
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|---|
| 26 |
* POSSIBILITY OF SUCH DAMAGE. |
|---|
| 27 |
*/ |
|---|
| 28 |
|
|---|
| 29 |
#import "NSImage-FHAdditions.h" |
|---|
| 30 |
#import "FHHTMLParser.h" |
|---|
| 31 |
#import "FHSettings.h" |
|---|
| 32 |
|
|---|
| 33 |
@implementation FHHTMLParser |
|---|
| 34 |
|
|---|
| 35 |
+ (NSArray *)imageLinksInHTML:(NSString *)html baseURL:(WIURL *)baseURL { |
|---|
| 36 |
return [self imageLinksInHTML:html baseURL:baseURL type:[FHSettings intForKey:FHHTMLImageType]]; |
|---|
| 37 |
} |
|---|
| 38 |
|
|---|
| 39 |
|
|---|
| 40 |
|
|---|
| 41 |
+ (NSArray *)imageLinksInHTML:(NSString *)html baseURL:(WIURL *)baseURL type:(int)type { |
|---|
| 42 |
NSScanner *scanner; |
|---|
| 43 |
NSMutableArray *links, *urls; |
|---|
| 44 |
NSMutableSet *set; |
|---|
| 45 |
NSCharacterSet *skipSet; |
|---|
| 46 |
NSArray *tokens; |
|---|
| 47 |
NSSet *types; |
|---|
| 48 |
NSString *token, *link, *path, *extension; |
|---|
| 49 |
WIURL *url; |
|---|
| 50 |
NSUInteger i, count, length; |
|---|
| 51 |
|
|---|
| 52 |
links = [NSMutableArray arrayWithCapacity:50]; |
|---|
| 53 |
length = [html length]; |
|---|
| 54 |
skipSet = [NSCharacterSet characterSetWithCharactersInString:@" =\r\n\t\"\'<>"]; |
|---|
| 55 |
|
|---|
| 56 |
switch(type) { |
|---|
| 57 |
case FHHTMLImageOnlyInline: |
|---|
| 58 |
tokens = [NSArray arrayWithObject:@"SRC"]; |
|---|
| 59 |
break; |
|---|
| 60 |
|
|---|
| 61 |
case FHHTMLImageOnlyLinks: |
|---|
| 62 |
tokens = [NSArray arrayWithObject:@"HREF"]; |
|---|
| 63 |
break; |
|---|
| 64 |
|
|---|
| 65 |
case FHHTMLImageBothInlineAndLinks: |
|---|
| 66 |
default: |
|---|
| 67 |
tokens = [NSArray arrayWithObjects:@"HREF", @"SRC", nil]; |
|---|
| 68 |
break; |
|---|
| 69 |
} |
|---|
| 70 |
|
|---|
| 71 |
count = [tokens count]; |
|---|
| 72 |
|
|---|
| 73 |
for(i = 0; i < count; i++) { |
|---|
| 74 |
token = [tokens objectAtIndex:i]; |
|---|
| 75 |
|
|---|
| 76 |
scanner = [[NSScanner alloc] initWithString:html]; |
|---|
| 77 |
[scanner setCaseSensitive:NO]; |
|---|
| 78 |
[scanner setCharactersToBeSkipped:skipSet]; |
|---|
| 79 |
|
|---|
| 80 |
while([scanner scanLocation] < length) { |
|---|
| 81 |
if([scanner scanUpToString:token intoString:NULL]) { |
|---|
| 82 |
if([scanner scanString:token intoString:NULL]) { |
|---|
| 83 |
if([scanner scanUpToCharactersFromSet:skipSet intoString:&link]) |
|---|
| 84 |
[links addObject:link]; |
|---|
| 85 |
} |
|---|
| 86 |
} |
|---|
| 87 |
} |
|---|
| 88 |
|
|---|
| 89 |
[scanner release]; |
|---|
| 90 |
} |
|---|
| 91 |
|
|---|
| 92 |
count = [links count]; |
|---|
| 93 |
urls = [NSMutableArray arrayWithCapacity:count]; |
|---|
| 94 |
set = [NSMutableSet setWithCapacity:count]; |
|---|
| 95 |
types = [NSSet setWithArray:[NSImage FHImageFileTypes]]; |
|---|
| 96 |
|
|---|
| 97 |
for(i = 0; i < count; i++) { |
|---|
| 98 |
link = [links objectAtIndex:i]; |
|---|
| 99 |
extension = [link pathExtension]; |
|---|
| 100 |
|
|---|
| 101 |
if(![types containsObject:extension]) |
|---|
| 102 |
continue; |
|---|
| 103 |
|
|---|
| 104 |
if([extension isEqualToString:@"ico"]) |
|---|
| 105 |
continue; |
|---|
| 106 |
|
|---|
| 107 |
link = [link stringByReplacingURLPercentEscapes]; |
|---|
| 108 |
|
|---|
| 109 |
if([link containsSubstring:@"://"]) { |
|---|
| 110 |
url = [WIURL URLWithString:link]; |
|---|
| 111 |
} else { |
|---|
| 112 |
url = [[baseURL copy] autorelease]; |
|---|
| 113 |
|
|---|
| 114 |
if([link hasPrefix:@"/"]) { |
|---|
| 115 |
[url setPath:link]; |
|---|
| 116 |
} else { |
|---|
| 117 |
path = [url path]; |
|---|
| 118 |
|
|---|
| 119 |
if(![path hasSuffix:@"/"]) |
|---|
| 120 |
path = [path stringByDeletingLastPathComponent]; |
|---|
| 121 |
|
|---|
| 122 |
[url setPath:[path stringByAppendingPathComponent:link]]; |
|---|
| 123 |
} |
|---|
| 124 |
} |
|---|
| 125 |
|
|---|
| 126 |
if(![set containsObject:url]) { |
|---|
| 127 |
[urls addObject:url]; |
|---|
| 128 |
[set addObject:url]; |
|---|
| 129 |
} |
|---|
| 130 |
} |
|---|
| 131 |
|
|---|
| 132 |
return urls; |
|---|
| 133 |
} |
|---|
| 134 |
|
|---|
| 135 |
@end |
|---|