· 7 years ago · Jan 01, 2019, 07:54 AM
1.\" Automatically generated by Pod::Man 4.07 (Pod::Simple 3.32)
2.\"
3.\" Standard preamble:
4.\" ========================================================================
5.de Sp \" Vertical space (when we can't use .PP)
6.if t .sp .5v
7.if n .sp
8..
9.de Vb \" Begin verbatim text
10.ft CW
11.nf
12.ne \\$1
13..
14.de Ve \" End verbatim text
15.ft R
16.fi
17..
18.\" Set up some character translations and predefined strings. \*(-- will
19.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
20.\" double quote, and \*(R" will give a right double quote. \*(C+ will
21.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
22.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
23.\" nothing in troff, for use with C<>.
24.tr \(*W-
25.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
26.ie n \{\
27. ds -- \(*W-
28. ds PI pi
29. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
30. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
31. ds L" ""
32. ds R" ""
33. ds C` ""
34. ds C' ""
35'br\}
36.el\{\
37. ds -- \|\(em\|
38. ds PI \(*p
39. ds L" ``
40. ds R" ''
41. ds C`
42. ds C'
43'br\}
44.\"
45.\" Escape single quotes in literal strings from groff's Unicode transform.
46.ie \n(.g .ds Aq \(aq
47.el .ds Aq '
48.\"
49.\" If the F register is >0, we'll generate index entries on stderr for
50.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
51.\" entries marked with X<> in POD. Of course, you'll have to process the
52.\" output yourself in some meaningful fashion.
53.\"
54.\" Avoid warning from groff about undefined register 'F'.
55.de IX
56..
57.if !\nF .nr F 0
58.if \nF>0 \{\
59. de IX
60. tm Index:\\$1\t\\n%\t"\\$2"
61..
62. if !\nF==2 \{\
63. nr % 0
64. nr F 2
65. \}
66.\}
67.\"
68.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69.\" Fear. Run. Save yourself. No user-serviceable parts.
70. \" fudge factors for nroff and troff
71.if n \{\
72. ds #H 0
73. ds #V .8m
74. ds #F .3m
75. ds #[ \f1
76. ds #] \fP
77.\}
78.if t \{\
79. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
80. ds #V .6m
81. ds #F 0
82. ds #[ \&
83. ds #] \&
84.\}
85. \" simple accents for nroff and troff
86.if n \{\
87. ds ' \&
88. ds ` \&
89. ds ^ \&
90. ds , \&
91. ds ~ ~
92. ds /
93.\}
94.if t \{\
95. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
96. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
97. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
98. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
99. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
100. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
101.\}
102. \" troff and (daisy-wheel) nroff accents
103.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
104.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
105.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
106.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
107.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
108.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
109.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
110.ds ae a\h'-(\w'a'u*4/10)'e
111.ds Ae A\h'-(\w'A'u*4/10)'E
112. \" corrections for vroff
113.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
114.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
115. \" for low resolution devices (crt and lpr)
116.if \n(.H>23 .if \n(.V>19 \
117\{\
118. ds : e
119. ds 8 ss
120. ds o a
121. ds d- d\h'-1'\(ga
122. ds D- D\h'-1'\(hy
123. ds th \o'bp'
124. ds Th \o'LP'
125. ds ae ae
126. ds Ae AE
127.\}
128.rm #[ #] #H #V #F C
129.\" ========================================================================
130.\"
131.IX Title "WGET 1"
132.TH WGET 1 "2019-01-01" "GNU Wget 1.7.1" "GNU Wget"
133.\" For nroff, turn off justification. Always turn off hyphenation; it makes
134.\" way too many mistakes in technical documents.
135.if n .ad l
136.nh
137.SH "NAME"
138wget \- GNU Wget Manual
139.SH "SYNOPSIS"
140.IX Header "SYNOPSIS"
141wget [\fIoption\fR]... [\fI\s-1URL\s0\fR]...
142.SH "DESCRIPTION"
143.IX Header "DESCRIPTION"
144\&\s-1GNU\s0 Wget is a freely available network utility to retrieve files from
145the World Wide Web, using \s-1HTTP \s0(Hyper Text Transfer Protocol) and
146\&\s-1FTP \s0(File Transfer Protocol), the two most widely used Internet
147protocols. It has many useful features to make downloading easier, some
148of them being:
149.IP "\(bu" 4
150Wget is non-interactive, meaning that it can work in the background,
151while the user is not logged on. This allows you to start a retrieval
152and disconnect from the system, letting Wget finish the work. By
153contrast, most of the Web browsers require constant user's presence,
154which can be a great hindrance when transferring a lot of data.
155.IP "\(bu" 4
156Wget is capable of descending recursively through the structure of
157\&\s-1HTML\s0 documents and \s-1FTP\s0 directory trees, making a local copy of
158the directory hierarchy similar to the one on the remote server. This
159feature can be used to mirror archives and home pages, or traverse the
160web in search of data, like a \s-1WWW\s0 robot. In that
161spirit, Wget understands the \f(CW\*(C`norobots\*(C'\fR convention.
162.IP "\(bu" 4
163File name wildcard matching and recursive mirroring of directories are
164available when retrieving via \s-1FTP. \s0 Wget can read the time-stamp
165information given by both \s-1HTTP\s0 and \s-1FTP\s0 servers, and store it
166locally. Thus Wget can see if the remote file has changed since last
167retrieval, and automatically retrieve the new version if it has. This
168makes Wget suitable for mirroring of \s-1FTP\s0 sites, as well as home
169pages.
170.IP "\(bu" 4
171Wget works exceedingly well on slow or unstable connections,
172retrying the document until it is fully retrieved, or until a
173user-specified retry count is surpassed. It will try to resume the
174download from the point of interruption, using \f(CW\*(C`REST\*(C'\fR with \s-1FTP\s0
175and \f(CW\*(C`Range\*(C'\fR with \s-1HTTP\s0 servers that support them.
176.IP "\(bu" 4
177By default, Wget supports proxy servers, which can lighten the network
178load, speed up retrieval and provide access behind firewalls. However,
179if you are behind a firewall that requires that you use a socks style
180gateway, you can get the socks library and build Wget with support for
181socks. Wget also supports the passive \s-1FTP\s0 downloading as an
182option.
183.IP "\(bu" 4
184Builtin features offer mechanisms to tune which links you wish to follow.
185.IP "\(bu" 4
186The retrieval is conveniently traced with printing dots, each dot
187representing a fixed amount of data received (1KB by default). These
188representations can be customized to your preferences.
189.IP "\(bu" 4
190Most of the features are fully configurable, either through command line
191options, or via the initialization file \fI.wgetrc\fR. Wget allows you to define \fIglobal\fR startup files
192(\fI/usr/local/etc/wgetrc\fR by default) for site settings.
193.IP "\(bu" 4
194Finally, \s-1GNU\s0 Wget is free software. This means that everyone may use
195it, redistribute it and/or modify it under the terms of the \s-1GNU\s0 General
196Public License, as published by the Free Software Foundation.
197.SH "OPTIONS"
198.IX Header "OPTIONS"
199.SS "Basic Startup Options"
200.IX Subsection "Basic Startup Options"
201.IP "\fB\-V\fR" 4
202.IX Item "-V"
203.PD 0
204.IP "\fB\-\-version\fR" 4
205.IX Item "--version"
206.PD
207Display the version of Wget.
208.IP "\fB\-h\fR" 4
209.IX Item "-h"
210.PD 0
211.IP "\fB\-\-help\fR" 4
212.IX Item "--help"
213.PD
214Print a help message describing all of Wget's command-line options.
215.IP "\fB\-b\fR" 4
216.IX Item "-b"
217.PD 0
218.IP "\fB\-\-background\fR" 4
219.IX Item "--background"
220.PD
221Go to background immediately after startup. If no output file is
222specified via the \fB\-o\fR, output is redirected to \fIwget-log\fR.
223.IP "\fB\-e\fR \fIcommand\fR" 4
224.IX Item "-e command"
225.PD 0
226.IP "\fB\-\-execute\fR \fIcommand\fR" 4
227.IX Item "--execute command"
228.PD
229Execute \fIcommand\fR as if it were a part of \fI.wgetrc\fR. A command thus invoked will be executed
230\&\fIafter\fR the commands in \fI.wgetrc\fR, thus taking precedence over
231them.
232.SS "Logging and Input File Options"
233.IX Subsection "Logging and Input File Options"
234.IP "\fB\-o\fR \fIlogfile\fR" 4
235.IX Item "-o logfile"
236.PD 0
237.IP "\fB\-\-output\-file=\fR\fIlogfile\fR" 4
238.IX Item "--output-file=logfile"
239.PD
240Log all messages to \fIlogfile\fR. The messages are normally reported
241to standard error.
242.IP "\fB\-a\fR \fIlogfile\fR" 4
243.IX Item "-a logfile"
244.PD 0
245.IP "\fB\-\-append\-output=\fR\fIlogfile\fR" 4
246.IX Item "--append-output=logfile"
247.PD
248Append to \fIlogfile\fR. This is the same as \fB\-o\fR, only it appends
249to \fIlogfile\fR instead of overwriting the old log file. If
250\&\fIlogfile\fR does not exist, a new file is created.
251.IP "\fB\-d\fR" 4
252.IX Item "-d"
253.PD 0
254.IP "\fB\-\-debug\fR" 4
255.IX Item "--debug"
256.PD
257Turn on debug output, meaning various information important to the
258developers of Wget if it does not work properly. Your system
259administrator may have chosen to compile Wget without debug support, in
260which case \fB\-d\fR will not work. Please note that compiling with
261debug support is always safe\-\-\-Wget compiled with the debug support will
262\&\fInot\fR print any debug info unless requested with \fB\-d\fR.
263.IP "\fB\-q\fR" 4
264.IX Item "-q"
265.PD 0
266.IP "\fB\-\-quiet\fR" 4
267.IX Item "--quiet"
268.PD
269Turn off Wget's output.
270.IP "\fB\-v\fR" 4
271.IX Item "-v"
272.PD 0
273.IP "\fB\-\-verbose\fR" 4
274.IX Item "--verbose"
275.PD
276Turn on verbose output, with all the available data. The default output
277is verbose.
278.IP "\fB\-nv\fR" 4
279.IX Item "-nv"
280.PD 0
281.IP "\fB\-\-non\-verbose\fR" 4
282.IX Item "--non-verbose"
283.PD
284Non-verbose output\-\-\-turn off verbose without being completely quiet
285(use \fB\-q\fR for that), which means that error messages and basic
286information still get printed.
287.IP "\fB\-i\fR \fIfile\fR" 4
288.IX Item "-i file"
289.PD 0
290.IP "\fB\-\-input\-file=\fR\fIfile\fR" 4
291.IX Item "--input-file=file"
292.PD
293Read URLs from \fIfile\fR, in which case no URLs need to be on
294the command line. If there are URLs both on the command line and
295in an input file, those on the command lines will be the first ones to
296be retrieved. The \fIfile\fR need not be an \s-1HTML\s0 document (but no
297harm if it is)\-\-\-it is enough if the URLs are just listed
298sequentially.
299.Sp
300However, if you specify \fB\-\-force\-html\fR, the document will be
301regarded as \fBhtml\fR. In that case you may have problems with
302relative links, which you can solve either by adding \f(CW\*(C`<base
303href="\f(CIurl\f(CW">\*(C'\fR to the documents or by specifying
304\&\fB\-\-base=\fR\fIurl\fR on the command line.
305.IP "\fB\-F\fR" 4
306.IX Item "-F"
307.PD 0
308.IP "\fB\-\-force\-html\fR" 4
309.IX Item "--force-html"
310.PD
311When input is read from a file, force it to be treated as an \s-1HTML\s0
312file. This enables you to retrieve relative links from existing
313\&\s-1HTML\s0 files on your local disk, by adding \f(CW\*(C`<base
314href="\f(CIurl\f(CW">\*(C'\fR to \s-1HTML,\s0 or using the \fB\-\-base\fR command-line
315option.
316.IP "\fB\-B\fR \fI\s-1URL\s0\fR" 4
317.IX Item "-B URL"
318.PD 0
319.IP "\fB\-\-base=\fR\fI\s-1URL\s0\fR" 4
320.IX Item "--base=URL"
321.PD
322When used in conjunction with \fB\-F\fR, prepends \fI\s-1URL\s0\fR to relative
323links in the file specified by \fB\-i\fR.
324.SS "Download Options"
325.IX Subsection "Download Options"
326.IP "\fB\-\-bind\-address=\fR\fI\s-1ADDRESS\s0\fR" 4
327.IX Item "--bind-address=ADDRESS"
328When making client \s-1TCP/IP\s0 connections, \f(CW\*(C`bind()\*(C'\fR to \fI\s-1ADDRESS\s0\fR on
329the local machine. \fI\s-1ADDRESS\s0\fR may be specified as a hostname or \s-1IP\s0
330address. This option can be useful if your machine is bound to multiple
331IPs.
332.IP "\fB\-t\fR \fInumber\fR" 4
333.IX Item "-t number"
334.PD 0
335.IP "\fB\-\-tries=\fR\fInumber\fR" 4
336.IX Item "--tries=number"
337.PD
338Set number of retries to \fInumber\fR. Specify 0 or \fBinf\fR for
339infinite retrying.
340.IP "\fB\-O\fR \fIfile\fR" 4
341.IX Item "-O file"
342.PD 0
343.IP "\fB\-\-output\-document=\fR\fIfile\fR" 4
344.IX Item "--output-document=file"
345.PD
346The documents will not be written to the appropriate files, but all will
347be concatenated together and written to \fIfile\fR. If \fIfile\fR
348already exists, it will be overwritten. If the \fIfile\fR is \fB\-\fR,
349the documents will be written to standard output. Including this option
350automatically sets the number of tries to 1.
351.IP "\fB\-nc\fR" 4
352.IX Item "-nc"
353.PD 0
354.IP "\fB\-\-no\-clobber\fR" 4
355.IX Item "--no-clobber"
356.PD
357If a file is downloaded more than once in the same directory, Wget's
358behavior depends on a few options, including \fB\-nc\fR. In certain
359cases, the local file will be \fIclobbered\fR, or overwritten, upon
360repeated download. In other cases it will be preserved.
361.Sp
362When running Wget without \fB\-N\fR, \fB\-nc\fR, or \fB\-r\fR,
363downloading the same file in the same directory will result in the
364original copy of \fIfile\fR being preserved and the second copy being
365named \fIfile\fR\fB.1\fR. If that file is downloaded yet again, the
366third copy will be named \fIfile\fR\fB.2\fR, and so on. When
367\&\fB\-nc\fR is specified, this behavior is suppressed, and Wget will
368refuse to download newer copies of \fIfile\fR. Therefore,
369``\f(CW\*(C`no\-clobber\*(C'\fR'' is actually a misnomer in this mode\-\-\-it's not
370clobbering that's prevented (as the numeric suffixes were already
371preventing clobbering), but rather the multiple version saving that's
372prevented.
373.Sp
374When running Wget with \fB\-r\fR, but without \fB\-N\fR or \fB\-nc\fR,
375re-downloading a file will result in the new copy simply overwriting the
376old. Adding \fB\-nc\fR will prevent this behavior, instead causing the
377original version to be preserved and any newer copies on the server to
378be ignored.
379.Sp
380When running Wget with \fB\-N\fR, with or without \fB\-r\fR, the
381decision as to whether or not to download a newer copy of a file depends
382on the local and remote timestamp and size of the file. \fB\-nc\fR may not be specified at the same
383time as \fB\-N\fR.
384.Sp
385Note that when \fB\-nc\fR is specified, files with the suffixes
386\&\fB.html\fR or (yuck) \fB.htm\fR will be loaded from the local disk
387and parsed as if they had been retrieved from the Web.
388.IP "\fB\-c\fR" 4
389.IX Item "-c"
390.PD 0
391.IP "\fB\-\-continue\fR" 4
392.IX Item "--continue"
393.PD
394Continue getting a partially-downloaded file. This is useful when you
395want to finish up a download started by a previous instance of Wget, or
396by another program. For instance:
397.Sp
398.Vb 1
399\& wget \-c ftp://sunsite.doc.ic.ac.uk/ls\-lR.Z
400.Ve
401.Sp
402If there is a file named \fIls\-lR.Z\fR in the current directory, Wget
403will assume that it is the first portion of the remote file, and will
404ask the server to continue the retrieval from an offset equal to the
405length of the local file.
406.Sp
407Note that you don't need to specify this option if you just want the
408current invocation of Wget to retry downloading a file should the
409connection be lost midway through. This is the default behavior.
410\&\fB\-c\fR only affects resumption of downloads started \fIprior\fR to
411this invocation of Wget, and whose local files are still sitting around.
412.Sp
413Without \fB\-c\fR, the previous example would just download the remote
414file to \fIls\-lR.Z.1\fR, leaving the truncated \fIls\-lR.Z\fR file
415alone.
416.Sp
417Beginning with Wget 1.7, if you use \fB\-c\fR on a non-empty file, and
418it turns out that the server does not support continued downloading,
419Wget will refuse to start the download from scratch, which would
420effectively ruin existing contents. If you really want the download to
421start from scratch, remove the file.
422.Sp
423Also beginning with Wget 1.7, if you use \fB\-c\fR on a file which is of
424equal size as the one on the server, Wget will refuse to download the
425file and print an explanatory message. The same happens when the file
426is smaller on the server than locally (presumably because it was changed
427on the server since your last download attempt)\-\-\-because ``continuing''
428is not meaningful, no download occurs.
429.Sp
430On the other side of the coin, while using \fB\-c\fR, any file that's
431bigger on the server than locally will be considered an incomplete
432download and only \f(CW\*(C`(length(remote) \- length(local))\*(C'\fR bytes will be
433downloaded and tacked onto the end of the local file. This behavior can
434be desirable in certain cases\-\-\-for instance, you can use \fBwget \-c\fR
435to download just the new portion that's been appended to a data
436collection or log file.
437.Sp
438However, if the file is bigger on the server because it's been
439\&\fIchanged\fR, as opposed to just \fIappended\fR to, you'll end up
440with a garbled file. Wget has no way of verifying that the local file
441is really a valid prefix of the remote file. You need to be especially
442careful of this when using \fB\-c\fR in conjunction with \fB\-r\fR,
443since every file will be considered as an \*(L"incomplete download\*(R" candidate.
444.Sp
445Another instance where you'll get a garbled file if you try to use
446\&\fB\-c\fR is if you have a lame \s-1HTTP\s0 proxy that inserts a
447``transfer interrupted'' string into the local file. In the future a
448``rollback'' option may be added to deal with this case.
449.Sp
450Note that \fB\-c\fR only works with \s-1FTP\s0 servers and with \s-1HTTP\s0
451servers that support the \f(CW\*(C`Range\*(C'\fR header.
452.IP "\fB\-\-dot\-style=\fR\fIstyle\fR" 4
453.IX Item "--dot-style=style"
454Set the retrieval style to \fIstyle\fR. Wget traces the retrieval of
455each document by printing dots on the screen, each dot representing a
456fixed amount of retrieved data. Any number of dots may be separated in
457a \fIcluster\fR, to make counting easier. This option allows you to
458choose one of the pre-defined styles, determining the number of bytes
459represented by a dot, the number of dots in a cluster, and the number of
460dots on the line.
461.Sp
462With the \f(CW\*(C`default\*(C'\fR style each dot represents 1K, there are ten dots
463in a cluster and 50 dots in a line. The \f(CW\*(C`binary\*(C'\fR style has a more
464``computer''\-like orientation\-\-\-8K dots, 16\-dots clusters and 48 dots
465per line (which makes for 384K lines). The \f(CW\*(C`mega\*(C'\fR style is
466suitable for downloading very large files\-\-\-each dot represents 64K
467retrieved, there are eight dots in a cluster, and 48 dots on each line
468(so each line contains 3M). The \f(CW\*(C`micro\*(C'\fR style is exactly the
469reverse; it is suitable for downloading small files, with 128\-byte dots,
4708 dots per cluster, and 48 dots (6K) per line.
471.IP "\fB\-N\fR" 4
472.IX Item "-N"
473.PD 0
474.IP "\fB\-\-timestamping\fR" 4
475.IX Item "--timestamping"
476.PD
477Turn on time-stamping.
478.IP "\fB\-S\fR" 4
479.IX Item "-S"
480.PD 0
481.IP "\fB\-\-server\-response\fR" 4
482.IX Item "--server-response"
483.PD
484Print the headers sent by \s-1HTTP\s0 servers and responses sent by
485\&\s-1FTP\s0 servers.
486.IP "\fB\-\-spider\fR" 4
487.IX Item "--spider"
488When invoked with this option, Wget will behave as a Web \fIspider\fR,
489which means that it will not download the pages, just check that they
490are there. You can use it to check your bookmarks, e.g. with:
491.Sp
492.Vb 1
493\& wget \-\-spider \-\-force\-html \-i bookmarks.html
494.Ve
495.Sp
496This feature needs much more work for Wget to get close to the
497functionality of real \s-1WWW\s0 spiders.
498.IP "\fB\-T seconds\fR" 4
499.IX Item "-T seconds"
500.PD 0
501.IP "\fB\-\-timeout=\fR\fIseconds\fR" 4
502.IX Item "--timeout=seconds"
503.PD
504Set the read timeout to \fIseconds\fR seconds. Whenever a network read
505is issued, the file descriptor is checked for a timeout, which could
506otherwise leave a pending connection (uninterrupted read). The default
507timeout is 900 seconds (fifteen minutes). Setting timeout to 0 will
508disable checking for timeouts.
509.Sp
510Please do not lower the default timeout value with this option unless
511you know what you are doing.
512.IP "\fB\-w\fR \fIseconds\fR" 4
513.IX Item "-w seconds"
514.PD 0
515.IP "\fB\-\-wait=\fR\fIseconds\fR" 4
516.IX Item "--wait=seconds"
517.PD
518Wait the specified number of seconds between the retrievals. Use of
519this option is recommended, as it lightens the server load by making the
520requests less frequent. Instead of in seconds, the time can be
521specified in minutes using the \f(CW\*(C`m\*(C'\fR suffix, in hours using \f(CW\*(C`h\*(C'\fR
522suffix, or in days using \f(CW\*(C`d\*(C'\fR suffix.
523.Sp
524Specifying a large value for this option is useful if the network or the
525destination host is down, so that Wget can wait long enough to
526reasonably expect the network error to be fixed before the retry.
527.IP "\fB\-\-waitretry=\fR\fIseconds\fR" 4
528.IX Item "--waitretry=seconds"
529If you don't want Wget to wait between \fIevery\fR retrieval, but only
530between retries of failed downloads, you can use this option. Wget will
531use \fIlinear backoff\fR, waiting 1 second after the first failure on a
532given file, then waiting 2 seconds after the second failure on that
533file, up to the maximum number of \fIseconds\fR you specify. Therefore,
534a value of 10 will actually make Wget wait up to (1 + 2 + ... + 10) = 55
535seconds per file.
536.Sp
537Note that this option is turned on by default in the global
538\&\fIwgetrc\fR file.
539.IP "\fB\-Y on/off\fR" 4
540.IX Item "-Y on/off"
541.PD 0
542.IP "\fB\-\-proxy=on/off\fR" 4
543.IX Item "--proxy=on/off"
544.PD
545Turn proxy support on or off. The proxy is on by default if the
546appropriate environmental variable is defined.
547.IP "\fB\-Q\fR \fIquota\fR" 4
548.IX Item "-Q quota"
549.PD 0
550.IP "\fB\-\-quota=\fR\fIquota\fR" 4
551.IX Item "--quota=quota"
552.PD
553Specify download quota for automatic retrievals. The value can be
554specified in bytes (default), kilobytes (with \fBk\fR suffix), or
555megabytes (with \fBm\fR suffix).
556.Sp
557Note that quota will never affect downloading a single file. So if you
558specify \fBwget \-Q10k ftp://wuarchive.wustl.edu/ls\-lR.gz\fR, all of the
559\&\fIls\-lR.gz\fR will be downloaded. The same goes even when several
560URLs are specified on the command-line. However, quota is
561respected when retrieving either recursively, or from an input file.
562Thus you may safely type \fBwget \-Q2m \-i sites\fR\-\-\-download will be
563aborted when the quota is exceeded.
564.Sp
565Setting quota to 0 or to \fBinf\fR unlimits the download quota.
566.SS "Directory Options"
567.IX Subsection "Directory Options"
568.IP "\fB\-nd\fR" 4
569.IX Item "-nd"
570.PD 0
571.IP "\fB\-\-no\-directories\fR" 4
572.IX Item "--no-directories"
573.PD
574Do not create a hierarchy of directories when retrieving recursively.
575With this option turned on, all files will get saved to the current
576directory, without clobbering (if a name shows up more than once, the
577filenames will get extensions \fB.n\fR).
578.IP "\fB\-x\fR" 4
579.IX Item "-x"
580.PD 0
581.IP "\fB\-\-force\-directories\fR" 4
582.IX Item "--force-directories"
583.PD
584The opposite of \fB\-nd\fR\-\-\-create a hierarchy of directories, even if
585one would not have been created otherwise. E.g. \fBwget \-x
586http://fly.srk.fer.hr/robots.txt\fR will save the downloaded file to
587\&\fIfly.srk.fer.hr/robots.txt\fR.
588.IP "\fB\-nH\fR" 4
589.IX Item "-nH"
590.PD 0
591.IP "\fB\-\-no\-host\-directories\fR" 4
592.IX Item "--no-host-directories"
593.PD
594Disable generation of host-prefixed directories. By default, invoking
595Wget with \fB\-r http://fly.srk.fer.hr/\fR will create a structure of
596directories beginning with \fIfly.srk.fer.hr/\fR. This option disables
597such behavior.
598.IP "\fB\-\-cut\-dirs=\fR\fInumber\fR" 4
599.IX Item "--cut-dirs=number"
600Ignore \fInumber\fR directory components. This is useful for getting a
601fine-grained control over the directory where recursive retrieval will
602be saved.
603.Sp
604Take, for example, the directory at
605\&\fBftp://ftp.xemacs.org/pub/xemacs/\fR. If you retrieve it with
606\&\fB\-r\fR, it will be saved locally under
607\&\fIftp.xemacs.org/pub/xemacs/\fR. While the \fB\-nH\fR option can
608remove the \fIftp.xemacs.org/\fR part, you are still stuck with
609\&\fIpub/xemacs\fR. This is where \fB\-\-cut\-dirs\fR comes in handy; it
610makes Wget not ``see'' \fInumber\fR remote directory components. Here
611are several examples of how \fB\-\-cut\-dirs\fR option works.
612.Sp
613.Vb 4
614\& No options \-> ftp.xemacs.org/pub/xemacs/
615\& \-nH \-> pub/xemacs/
616\& \-nH \-\-cut\-dirs=1 \-> xemacs/
617\& \-nH \-\-cut\-dirs=2 \-> .
618\&
619\& \-\-cut\-dirs=1 \-> ftp.xemacs.org/xemacs/
620\& ...
621.Ve
622.Sp
623If you just want to get rid of the directory structure, this option is
624similar to a combination of \fB\-nd\fR and \fB\-P\fR. However, unlike
625\&\fB\-nd\fR, \fB\-\-cut\-dirs\fR does not lose with subdirectories\-\-\-for
626instance, with \fB\-nH \-\-cut\-dirs=1\fR, a \fIbeta/\fR subdirectory will
627be placed to \fIxemacs/beta\fR, as one would expect.
628.IP "\fB\-P\fR \fIprefix\fR" 4
629.IX Item "-P prefix"
630.PD 0
631.IP "\fB\-\-directory\-prefix=\fR\fIprefix\fR" 4
632.IX Item "--directory-prefix=prefix"
633.PD
634Set directory prefix to \fIprefix\fR. The \fIdirectory prefix\fR is the
635directory where all other files and subdirectories will be saved to,
636i.e. the top of the retrieval tree. The default is \fB.\fR (the
637current directory).
638.SS "\s-1HTTP\s0 Options"
639.IX Subsection "HTTP Options"
640.IP "\fB\-E\fR" 4
641.IX Item "-E"
642.PD 0
643.IP "\fB\-\-html\-extension\fR" 4
644.IX Item "--html-extension"
645.PD
646If a file of type \fBtext/html\fR is downloaded and the \s-1URL\s0 does not
647end with the regexp \fB\e.[Hh][Tt][Mm][Ll]?\fR, this option will cause
648the suffix \fB.html\fR to be appended to the local filename. This is
649useful, for instance, when you're mirroring a remote site that uses
650\&\fB.asp\fR pages, but you want the mirrored pages to be viewable on
651your stock Apache server. Another good use for this is when you're
652downloading the output of CGIs. A \s-1URL\s0 like
653\&\fBhttp://site.com/article.cgi?25\fR will be saved as
654\&\fIarticle.cgi?25.html\fR.
655.Sp
656Note that filenames changed in this way will be re-downloaded every time
657you re-mirror a site, because Wget can't tell that the local
658\&\fI\fIX\fI.html\fR file corresponds to remote \s-1URL \s0\fIX\fR (since
659it doesn't yet know that the \s-1URL\s0 produces output of type
660\&\fBtext/html\fR. To prevent this re-downloading, you must use
661\&\fB\-k\fR and \fB\-K\fR so that the original version of the file will be
662saved as \fI\fIX\fI.orig\fR.
663.IP "\fB\-\-http\-user=\fR\fIuser\fR" 4
664.IX Item "--http-user=user"
665.PD 0
666.IP "\fB\-\-http\-passwd=\fR\fIpassword\fR" 4
667.IX Item "--http-passwd=password"
668.PD
669Specify the username \fIuser\fR and password \fIpassword\fR on an
670\&\s-1HTTP\s0 server. According to the type of the challenge, Wget will
671encode them using either the \f(CW\*(C`basic\*(C'\fR (insecure) or the
672\&\f(CW\*(C`digest\*(C'\fR authentication scheme.
673.Sp
674Another way to specify username and password is in the \s-1URL\s0 itself. For more information about security issues with
675Wget,
676.IP "\fB\-C on/off\fR" 4
677.IX Item "-C on/off"
678.PD 0
679.IP "\fB\-\-cache=on/off\fR" 4
680.IX Item "--cache=on/off"
681.PD
682When set to off, disable server-side cache. In this case, Wget will
683send the remote server an appropriate directive (\fBPragma:
684no-cache\fR) to get the file from the remote service, rather than
685returning the cached version. This is especially useful for retrieving
686and flushing out-of-date documents on proxy servers.
687.Sp
688Caching is allowed by default.
689.IP "\fB\-\-cookies=on/off\fR" 4
690.IX Item "--cookies=on/off"
691When set to off, disable the use of cookies. Cookies are a mechanism
692for maintaining server-side state. The server sends the client a cookie
693using the \f(CW\*(C`Set\-Cookie\*(C'\fR header, and the client responds with the
694same cookie upon further requests. Since cookies allow the server
695owners to keep track of visitors and for sites to exchange this
696information, some consider them a breach of privacy. The default is to
697use cookies; however, \fIstoring\fR cookies is not on by default.
698.IP "\fB\-\-load\-cookies\fR \fIfile\fR" 4
699.IX Item "--load-cookies file"
700Load cookies from \fIfile\fR before the first \s-1HTTP\s0 retrieval. The
701format of \fIfile\fR is one used by Netscape and Mozilla, at least their
702Unix version.
703.IP "\fB\-\-save\-cookies\fR \fIfile\fR" 4
704.IX Item "--save-cookies file"
705Save cookies from \fIfile\fR at the end of session. Cookies whose
706expiry time is not specified, or those that have already expired, are
707not saved.
708.IP "\fB\-\-ignore\-length\fR" 4
709.IX Item "--ignore-length"
710Unfortunately, some \s-1HTTP\s0 servers (\s-1CGI\s0 programs, to be more
711precise) send out bogus \f(CW\*(C`Content\-Length\*(C'\fR headers, which makes Wget
712go wild, as it thinks not all the document was retrieved. You can spot
713this syndrome if Wget retries getting the same document again and again,
714each time claiming that the (otherwise normal) connection has closed on
715the very same byte.
716.Sp
717With this option, Wget will ignore the \f(CW\*(C`Content\-Length\*(C'\fR header\-\-\-as
718if it never existed.
719.IP "\fB\-\-header=\fR\fIadditional-header\fR" 4
720.IX Item "--header=additional-header"
721Define an \fIadditional-header\fR to be passed to the \s-1HTTP\s0 servers.
722Headers must contain a \fB:\fR preceded by one or more non-blank
723characters, and must not contain newlines.
724.Sp
725You may define more than one additional header by specifying
726\&\fB\-\-header\fR more than once.
727.Sp
728.Vb 3
729\& wget \-\-header=\*(AqAccept\-Charset: iso\-8859\-2\*(Aq \e
730\& \-\-header=\*(AqAccept\-Language: hr\*(Aq \e
731\& http://fly.srk.fer.hr/
732.Ve
733.Sp
734Specification of an empty string as the header value will clear all
735previous user-defined headers.
736.IP "\fB\-\-proxy\-user=\fR\fIuser\fR" 4
737.IX Item "--proxy-user=user"
738.PD 0
739.IP "\fB\-\-proxy\-passwd=\fR\fIpassword\fR" 4
740.IX Item "--proxy-passwd=password"
741.PD
742Specify the username \fIuser\fR and password \fIpassword\fR for
743authentication on a proxy server. Wget will encode them using the
744\&\f(CW\*(C`basic\*(C'\fR authentication scheme.
745.IP "\fB\-\-referer=\fR\fIurl\fR" 4
746.IX Item "--referer=url"
747Include `Referer: \fIurl\fR' header in \s-1HTTP\s0 request. Useful for
748retrieving documents with server-side processing that assume they are
749always being retrieved by interactive web browsers and only come out
750properly when Referer is set to one of the pages that point to them.
751.IP "\fB\-s\fR" 4
752.IX Item "-s"
753.PD 0
754.IP "\fB\-\-save\-headers\fR" 4
755.IX Item "--save-headers"
756.PD
757Save the headers sent by the \s-1HTTP\s0 server to the file, preceding the
758actual contents, with an empty line as the separator.
759.IP "\fB\-U\fR \fIagent-string\fR" 4
760.IX Item "-U agent-string"
761.PD 0
762.IP "\fB\-\-user\-agent=\fR\fIagent-string\fR" 4
763.IX Item "--user-agent=agent-string"
764.PD
765Identify as \fIagent-string\fR to the \s-1HTTP\s0 server.
766.Sp
767The \s-1HTTP\s0 protocol allows the clients to identify themselves using a
768\&\f(CW\*(C`User\-Agent\*(C'\fR header field. This enables distinguishing the
769\&\s-1WWW\s0 software, usually for statistical purposes or for tracing of
770protocol violations. Wget normally identifies as
771\&\fBWget/\fR\fIversion\fR, \fIversion\fR being the current version
772number of Wget.
773.Sp
774However, some sites have been known to impose the policy of tailoring
775the output according to the \f(CW\*(C`User\-Agent\*(C'\fR\-supplied information.
776While conceptually this is not such a bad idea, it has been abused by
777servers denying information to clients other than \f(CW\*(C`Mozilla\*(C'\fR or
778Microsoft \f(CW\*(C`Internet Explorer\*(C'\fR. This option allows you to change
779the \f(CW\*(C`User\-Agent\*(C'\fR line issued by Wget. Use of this option is
780discouraged, unless you really know what you are doing.
781.SS "\s-1FTP\s0 Options"
782.IX Subsection "FTP Options"
783.IP "\fB\-nr\fR" 4
784.IX Item "-nr"
785.PD 0
786.IP "\fB\-\-dont\-remove\-listing\fR" 4
787.IX Item "--dont-remove-listing"
788.PD
789Don't remove the temporary \fI.listing\fR files generated by \s-1FTP\s0
790retrievals. Normally, these files contain the raw directory listings
791received from \s-1FTP\s0 servers. Not removing them can be useful for
792debugging purposes, or when you want to be able to easily check on the
793contents of remote server directories (e.g. to verify that a mirror
794you're running is complete).
795.Sp
796Note that even though Wget writes to a known filename for this file,
797this is not a security hole in the scenario of a user making
798\&\fI.listing\fR a symbolic link to \fI/etc/passwd\fR or something and
799asking \f(CW\*(C`root\*(C'\fR to run Wget in his or her directory. Depending on
800the options used, either Wget will refuse to write to \fI.listing\fR,
801making the globbing/recursion/time\-stamping operation fail, or the
802symbolic link will be deleted and replaced with the actual
803\&\fI.listing\fR file, or the listing will be written to a
804\&\fI.listing.\fInumber\fI\fR file.
805.Sp
806Even though this situation isn't a problem, though, \f(CW\*(C`root\*(C'\fR should
807never run Wget in a non-trusted user's directory. A user could do
808something as simple as linking \fIindex.html\fR to \fI/etc/passwd\fR
809and asking \f(CW\*(C`root\*(C'\fR to run Wget with \fB\-N\fR or \fB\-r\fR so the file
810will be overwritten.
811.IP "\fB\-g on/off\fR" 4
812.IX Item "-g on/off"
813.PD 0
814.IP "\fB\-\-glob=on/off\fR" 4
815.IX Item "--glob=on/off"
816.PD
817Turn \s-1FTP\s0 globbing on or off. Globbing means you may use the
818shell-like special characters (\fIwildcards\fR), like \fB*\fR,
819\&\fB?\fR, \fB[\fR and \fB]\fR to retrieve more than one file from the
820same directory at once, like:
821.Sp
822.Vb 1
823\& wget ftp://gnjilux.srk.fer.hr/*.msg
824.Ve
825.Sp
826By default, globbing will be turned on if the \s-1URL\s0 contains a
827globbing character. This option may be used to turn globbing on or off
828permanently.
829.Sp
830You may have to quote the \s-1URL\s0 to protect it from being expanded by
831your shell. Globbing makes Wget look for a directory listing, which is
832system-specific. This is why it currently works only with Unix \s-1FTP\s0
833servers (and the ones emulating Unix \f(CW\*(C`ls\*(C'\fR output).
834.IP "\fB\-\-passive\-ftp\fR" 4
835.IX Item "--passive-ftp"
836Use the \fIpassive\fR \s-1FTP\s0 retrieval scheme, in which the client
837initiates the data connection. This is sometimes required for \s-1FTP\s0
838to work behind firewalls.
839.IP "\fB\-\-retr\-symlinks\fR" 4
840.IX Item "--retr-symlinks"
841Usually, when retrieving \s-1FTP\s0 directories recursively and a symbolic
842link is encountered, the linked-to file is not downloaded. Instead, a
843matching symbolic link is created on the local filesystem. The
844pointed-to file will not be downloaded unless this recursive retrieval
845would have encountered it separately and downloaded it anyway.
846.Sp
847When \fB\-\-retr\-symlinks\fR is specified, however, symbolic links are
848traversed and the pointed-to files are retrieved. At this time, this
849option does not cause Wget to traverse symlinks to directories and
850recurse through them, but in the future it should be enhanced to do
851this.
852.Sp
853Note that when retrieving a file (not a directory) because it was
854specified on the commandline, rather than because it was recursed to,
855this option has no effect. Symbolic links are always traversed in this
856case.
857.SS "Recursive Retrieval Options"
858.IX Subsection "Recursive Retrieval Options"
859.IP "\fB\-r\fR" 4
860.IX Item "-r"
861.PD 0
862.IP "\fB\-\-recursive\fR" 4
863.IX Item "--recursive"
864.PD
865Turn on recursive retrieving.
866.IP "\fB\-l\fR \fIdepth\fR" 4
867.IX Item "-l depth"
868.PD 0
869.IP "\fB\-\-level=\fR\fIdepth\fR" 4
870.IX Item "--level=depth"
871.PD
872Specify recursion maximum depth level \fIdepth\fR. The default maximum depth is 5.
873.IP "\fB\-\-delete\-after\fR" 4
874.IX Item "--delete-after"
875This option tells Wget to delete every single file it downloads,
876\&\fIafter\fR having done so. It is useful for pre-fetching popular
877pages through a proxy, e.g.:
878.Sp
879.Vb 1
880\& wget \-r \-nd \-\-delete\-after http://whatever.com/~popular/page/
881.Ve
882.Sp
883The \fB\-r\fR option is to retrieve recursively, and \fB\-nd\fR to not
884create directories.
885.Sp
886Note that \fB\-\-delete\-after\fR deletes files on the local machine. It
887does not issue the \fB\s-1DELE\s0\fR command to remote \s-1FTP\s0 sites, for
888instance. Also note that when \fB\-\-delete\-after\fR is specified,
889\&\fB\-\-convert\-links\fR is ignored, so \fB.orig\fR files are simply not
890created in the first place.
891.IP "\fB\-k\fR" 4
892.IX Item "-k"
893.PD 0
894.IP "\fB\-\-convert\-links\fR" 4
895.IX Item "--convert-links"
896.PD
897After the download is complete, convert the links in the document to
898make them suitable for local viewing. This affects not only the visible
899hyperlinks, but any part of the document that links to external content,
900such as embedded images, links to style sheets, hyperlinks to non-HTML
901content, etc.
902.Sp
903Each link will be changed in one of the two ways:
904.RS 4
905.IP "\(bu" 4
906The links to files that have been downloaded by Wget will be changed to
907refer to the file they point to as a relative link.
908.Sp
909Example: if the downloaded file \fI/foo/doc.html\fR links to
910\&\fI/bar/img.gif\fR, also downloaded, then the link in \fIdoc.html\fR
911will be modified to point to \fB../bar/img.gif\fR. This kind of
912transformation works reliably for arbitrary combinations of directories.
913.IP "\(bu" 4
914The links to files that have not been downloaded by Wget will be changed
915to include host name and absolute path of the location they point to.
916.Sp
917Example: if the downloaded file \fI/foo/doc.html\fR links to
918\&\fI/bar/img.gif\fR (or to \fI../bar/img.gif\fR), then the link in
919\&\fIdoc.html\fR will be modified to point to
920\&\fIhttp://\fIhostname\fI/bar/img.gif\fR.
921.RE
922.RS 4
923.Sp
924Because of this, local browsing works reliably: if a linked file was
925downloaded, the link will refer to its local name; if it was not
926downloaded, the link will refer to its full Internet address rather than
927presenting a broken link. The fact that the former links are converted
928to relative links ensures that you can move the downloaded hierarchy to
929another directory.
930.Sp
931Note that only at the end of the download can Wget know which links have
932been downloaded. Because of that, the work done by \fB\-k\fR will be
933performed at the end of all the downloads.
934.RE
935.IP "\fB\-K\fR" 4
936.IX Item "-K"
937.PD 0
938.IP "\fB\-\-backup\-converted\fR" 4
939.IX Item "--backup-converted"
940.PD
941When converting a file, back up the original version with a \fB.orig\fR
942suffix. Affects the behavior of \fB\-N\fR.
943.IP "\fB\-m\fR" 4
944.IX Item "-m"
945.PD 0
946.IP "\fB\-\-mirror\fR" 4
947.IX Item "--mirror"
948.PD
949Turn on options suitable for mirroring. This option turns on recursion
950and time-stamping, sets infinite recursion depth and keeps \s-1FTP\s0
951directory listings. It is currently equivalent to
952\&\fB\-r \-N \-l inf \-nr\fR.
953.IP "\fB\-p\fR" 4
954.IX Item "-p"
955.PD 0
956.IP "\fB\-\-page\-requisites\fR" 4
957.IX Item "--page-requisites"
958.PD
959This option causes Wget to download all the files that are necessary to
960properly display a given \s-1HTML\s0 page. This includes such things as
961inlined images, sounds, and referenced stylesheets.
962.Sp
963Ordinarily, when downloading a single \s-1HTML\s0 page, any requisite documents
964that may be needed to display it properly are not downloaded. Using
965\&\fB\-r\fR together with \fB\-l\fR can help, but since Wget does not
966ordinarily distinguish between external and inlined documents, one is
967generally left with ``leaf documents'' that are missing their
968requisites.
969.Sp
970For instance, say document \fI1.html\fR contains an \f(CW\*(C`<IMG>\*(C'\fR tag
971referencing \fI1.gif\fR and an \f(CW\*(C`<A>\*(C'\fR tag pointing to external
972document \fI2.html\fR. Say that \fI2.html\fR is similar but that its
973image is \fI2.gif\fR and it links to \fI3.html\fR. Say this
974continues up to some arbitrarily high number.
975.Sp
976If one executes the command:
977.Sp
978.Vb 1
979\& wget \-r \-l 2 http://I<site>/1.html
980.Ve
981.Sp
982then \fI1.html\fR, \fI1.gif\fR, \fI2.html\fR, \fI2.gif\fR, and
983\&\fI3.html\fR will be downloaded. As you can see, \fI3.html\fR is
984without its requisite \fI3.gif\fR because Wget is simply counting the
985number of hops (up to 2) away from \fI1.html\fR in order to determine
986where to stop the recursion. However, with this command:
987.Sp
988.Vb 1
989\& wget \-r \-l 2 \-p http://I<site>/1.html
990.Ve
991.Sp
992all the above files \fIand\fR \fI3.html\fR's requisite \fI3.gif\fR
993will be downloaded. Similarly,
994.Sp
995.Vb 1
996\& wget \-r \-l 1 \-p http://I<site>/1.html
997.Ve
998.Sp
999will cause \fI1.html\fR, \fI1.gif\fR, \fI2.html\fR, and \fI2.gif\fR
1000to be downloaded. One might think that:
1001.Sp
1002.Vb 1
1003\& wget \-r \-l 0 \-p http://I<site>/1.html
1004.Ve
1005.Sp
1006would download just \fI1.html\fR and \fI1.gif\fR, but unfortunately
1007this is not the case, because \fB\-l 0\fR is equivalent to
1008\&\fB\-l inf\fR\-\-\-that is, infinite recursion. To download a single \s-1HTML\s0
1009page (or a handful of them, all specified on the commandline or in a
1010\&\fB\-i\fR \s-1URL\s0 input file) and its (or their) requisites, simply leave off
1011\&\fB\-r\fR and \fB\-l\fR:
1012.Sp
1013.Vb 1
1014\& wget \-p http://I<site>/1.html
1015.Ve
1016.Sp
1017Note that Wget will behave as if \fB\-r\fR had been specified, but only
1018that single page and its requisites will be downloaded. Links from that
1019page to external documents will not be followed. Actually, to download
1020a single page and all its requisites (even if they exist on separate
1021websites), and make sure the lot displays properly locally, this author
1022likes to use a few options in addition to \fB\-p\fR:
1023.Sp
1024.Vb 1
1025\& wget \-E \-H \-k \-K \-nh \-p http://I<site>/I<document>
1026.Ve
1027.Sp
1028In one case you'll need to add a couple more options. If \fIdocument\fR
1029is a \f(CW\*(C`<FRAMESET>\*(C'\fR page, the \*(L"one more hop\*(R" that \fB\-p\fR gives you
1030won't be enough\-\-\-you'll get the \f(CW\*(C`<FRAME>\*(C'\fR pages that are
1031referenced, but you won't get \fItheir\fR requisites. Therefore, in
1032this case you'll need to add \fB\-r \-l1\fR to the commandline. The
1033\&\fB\-r \-l1\fR will recurse from the \f(CW\*(C`<FRAMESET>\*(C'\fR page to to the
1034\&\f(CW\*(C`<FRAME>\*(C'\fR pages, and the \fB\-p\fR will get their requisites. If
1035you're already using a recursion level of 1 or more, you'll need to up
1036it by one. In the future, \fB\-p\fR may be made smarter so that it'll
1037do \*(L"two more hops\*(R" in the case of a \f(CW\*(C`<FRAMESET>\*(C'\fR page.
1038.Sp
1039To finish off this topic, it's worth knowing that Wget's idea of an
1040external document link is any \s-1URL\s0 specified in an \f(CW\*(C`<A>\*(C'\fR tag, an
1041\&\f(CW\*(C`<AREA>\*(C'\fR tag, or a \f(CW\*(C`<LINK>\*(C'\fR tag other than \f(CW\*(C`<LINK
1042REL="stylesheet">\*(C'\fR.
1043.SS "Recursive Accept/Reject Options"
1044.IX Subsection "Recursive Accept/Reject Options"
1045.IP "\fB\-A\fR \fIacclist\fR \fB\-\-accept\fR \fIacclist\fR" 4
1046.IX Item "-A acclist --accept acclist"
1047.PD 0
1048.IP "\fB\-R\fR \fIrejlist\fR \fB\-\-reject\fR \fIrejlist\fR" 4
1049.IX Item "-R rejlist --reject rejlist"
1050.PD
1051Specify comma-separated lists of file name suffixes or patterns to
1052accept or reject.
1053.IP "\fB\-D\fR \fIdomain-list\fR" 4
1054.IX Item "-D domain-list"
1055.PD 0
1056.IP "\fB\-\-domains=\fR\fIdomain-list\fR" 4
1057.IX Item "--domains=domain-list"
1058.PD
1059Set domains to be accepted and \s-1DNS\s0 looked-up, where
1060\&\fIdomain-list\fR is a comma-separated list. Note that it does
1061\&\fInot\fR turn on \fB\-H\fR. This option speeds things up, even if
1062only one host is spanned.
1063.IP "\fB\-\-exclude\-domains\fR \fIdomain-list\fR" 4
1064.IX Item "--exclude-domains domain-list"
1065Exclude the domains given in a comma-separated \fIdomain-list\fR from
1066DNS-lookup.
1067.IP "\fB\-\-follow\-ftp\fR" 4
1068.IX Item "--follow-ftp"
1069Follow \s-1FTP\s0 links from \s-1HTML\s0 documents. Without this option,
1070Wget will ignore all the \s-1FTP\s0 links.
1071.IP "\fB\-\-follow\-tags=\fR\fIlist\fR" 4
1072.IX Item "--follow-tags=list"
1073Wget has an internal table of \s-1HTML\s0 tag / attribute pairs that it
1074considers when looking for linked documents during a recursive
1075retrieval. If a user wants only a subset of those tags to be
1076considered, however, he or she should be specify such tags in a
1077comma-separated \fIlist\fR with this option.
1078.IP "\fB\-G\fR \fIlist\fR" 4
1079.IX Item "-G list"
1080.PD 0
1081.IP "\fB\-\-ignore\-tags=\fR\fIlist\fR" 4
1082.IX Item "--ignore-tags=list"
1083.PD
1084This is the opposite of the \fB\-\-follow\-tags\fR option. To skip
1085certain \s-1HTML\s0 tags when recursively looking for documents to download,
1086specify them in a comma-separated \fIlist\fR.
1087.Sp
1088In the past, the \fB\-G\fR option was the best bet for downloading a
1089single page and its requisites, using a commandline like:
1090.Sp
1091.Vb 1
1092\& wget \-Ga,area \-H \-k \-K \-nh \-r http://I<site>/I<document>
1093.Ve
1094.Sp
1095However, the author of this option came across a page with tags like
1096\&\f(CW\*(C`<LINK REL="home" HREF="/">\*(C'\fR and came to the realization that
1097\&\fB\-G\fR was not enough. One can't just tell Wget to ignore
1098\&\f(CW\*(C`<LINK>\*(C'\fR, because then stylesheets will not be downloaded. Now the
1099best bet for downloading a single page and its requisites is the
1100dedicated \fB\-\-page\-requisites\fR option.
1101.IP "\fB\-H\fR" 4
1102.IX Item "-H"
1103.PD 0
1104.IP "\fB\-\-span\-hosts\fR" 4
1105.IX Item "--span-hosts"
1106.PD
1107Enable spanning across hosts when doing recursive retrieving.
1108.IP "\fB\-L\fR" 4
1109.IX Item "-L"
1110.PD 0
1111.IP "\fB\-\-relative\fR" 4
1112.IX Item "--relative"
1113.PD
1114Follow relative links only. Useful for retrieving a specific home page
1115without any distractions, not even those from the same hosts.
1116.IP "\fB\-I\fR \fIlist\fR" 4
1117.IX Item "-I list"
1118.PD 0
1119.IP "\fB\-\-include\-directories=\fR\fIlist\fR" 4
1120.IX Item "--include-directories=list"
1121.PD
1122Specify a comma-separated list of directories you wish to follow when
1123downloading Elements
1124of \fIlist\fR may contain wildcards.
1125.IP "\fB\-X\fR \fIlist\fR" 4
1126.IX Item "-X list"
1127.PD 0
1128.IP "\fB\-\-exclude\-directories=\fR\fIlist\fR" 4
1129.IX Item "--exclude-directories=list"
1130.PD
1131Specify a comma-separated list of directories you wish to exclude from
1132download Elements of
1133\&\fIlist\fR may contain wildcards.
1134.IP "\fB\-nh\fR" 4
1135.IX Item "-nh"
1136.PD 0
1137.IP "\fB\-\-no\-host\-lookup\fR" 4
1138.IX Item "--no-host-lookup"
1139.PD
1140Disable the time-consuming \s-1DNS\s0 lookup of almost all hosts.
1141.IP "\fB\-np\fR" 4
1142.IX Item "-np"
1143.PD 0
1144.IP "\fB\-\-no\-parent\fR" 4
1145.IX Item "--no-parent"
1146.PD
1147Do not ever ascend to the parent directory when retrieving recursively.
1148This is a useful option, since it guarantees that only the files
1149\&\fIbelow\fR a certain hierarchy will be downloaded.
1150.SH "FILES"
1151.IX Header "FILES"
1152.IP "\fB/usr/local/etc/wgetrc\fR" 4
1153.IX Item "/usr/local/etc/wgetrc"
1154Default location of the \fIglobal\fR startup file.
1155.IP "\fB.wgetrc\fR" 4
1156.IX Item ".wgetrc"
1157User startup file.
1158.SH "BUGS"
1159.IX Header "BUGS"
1160You are welcome to send bug reports about \s-1GNU\s0 Wget to
1161<\f(CW\*(C`bug\-wget@gnu.org\*(C'\fR>.
1162.PP
1163Before actually submitting a bug report, please try to follow a few
1164simple guidelines.
1165.IP "1." 4
1166Please try to ascertain that the behaviour you see really is a bug. If
1167Wget crashes, it's a bug. If Wget does not behave as documented,
1168it's a bug. If things work strange, but you are not sure about the way
1169they are supposed to work, it might well be a bug.
1170.IP "2." 4
1171Try to repeat the bug in as simple circumstances as possible. E.g. if
1172Wget crashes on \fBwget \-rLl0 \-t5 \-Y0 http://yoyodyne.com \-o
1173/tmp/log\fR, you should try to see if it will crash with a simpler set of
1174options.
1175.Sp
1176Also, while I will probably be interested to know the contents of your
1177\&\fI.wgetrc\fR file, just dumping it into the debug message is probably
1178a bad idea. Instead, you should first try to see if the bug repeats
1179with \fI.wgetrc\fR moved out of the way. Only if it turns out that
1180\&\fI.wgetrc\fR settings affect the bug, should you mail me the relevant
1181parts of the file.
1182.IP "3." 4
1183Please start Wget with \fB\-d\fR option and send the log (or the
1184relevant parts of it). If Wget was compiled without debug support,
1185recompile it. It is \fImuch\fR easier to trace bugs with debug support
1186on.
1187.IP "4." 4
1188If Wget has crashed, try to run it in a debugger, e.g. \f(CW\*(C`gdb \`which
1189wget\` core\*(C'\fR and type \f(CW\*(C`where\*(C'\fR to get the backtrace.
1190.IP "5." 4
1191Find where the bug is, fix it and send me the patches. :\-)
1192.SH "SEE ALSO"
1193.IX Header "SEE ALSO"
1194\&\s-1GNU\s0 Info entry for \fIwget\fR.
1195.SH "AUTHOR"
1196.IX Header "AUTHOR"
1197Originally written by Hrvoje Niksic <hniksic@arsdigita.com>.
1198.SH "COPYRIGHT"
1199.IX Header "COPYRIGHT"
1200Copyright (c) 1996, 1997, 1998, 2000, 2001 Free Software
1201Foundation, Inc.
1202.PP
1203Permission is granted to make and distribute verbatim copies of
1204this manual provided the copyright notice and this permission notice
1205are preserved on all copies.
1206.PP
1207Permission is granted to copy, distribute and/or modify this document
1208under the terms of the \s-1GNU\s0 Free Documentation License, Version 1.1 or
1209any later version published by the Free Software Foundation; with the
1210Invariant Sections being ``\s-1GNU\s0 General Public License'' and ``\s-1GNU\s0 Free
1211Documentation License'', with no Front-Cover Texts, and with no
1212Back-Cover Texts. A copy of the license is included in the section
1213entitled ``\s-1GNU\s0 Free Documentation License''.