From 6910a60de960b04100d13af855e1134772f70fef Mon Sep 17 00:00:00 2001 From: "Matteo Nastasi (mop)" Date: Mon, 25 Jun 2012 15:38:19 +0200 Subject: [PATCH] first commit of mod-proxy-fdpass using as package stub libapache2-mod-proxy-http package --- COPYING | 340 +++++ README | 17 + config.html | 254 ++++ debian/changelog | 5 + debian/compat | 1 + debian/conf/proxy_fdpass.load | 2 + debian/control | 15 + debian/copyright | 26 + debian/dirs | 2 + debian/docs | 3 + debian/install | 3 + debian/postinst | 68 + debian/prerm | 51 + debian/rules | 52 + faq.html | 74 + guide.html | 230 +++ mod_proxy_fdpass.c | 2463 +++++++++++++++++++++++++++++++++ mod_proxy_fdpass.h | 41 + proxy_fdpass.conf | 3 + 19 files changed, 3650 insertions(+) create mode 100644 COPYING create mode 100644 README create mode 100644 config.html create mode 100644 debian/changelog create mode 100644 debian/compat create mode 100644 debian/conf/proxy_fdpass.load create mode 100644 debian/control create mode 100644 debian/copyright create mode 100644 debian/dirs create mode 100644 debian/docs create mode 100644 debian/install create mode 100644 debian/postinst create mode 100644 debian/prerm create mode 100755 debian/rules create mode 100644 faq.html create mode 100644 guide.html create mode 100644 mod_proxy_fdpass.c create mode 100644 mod_proxy_fdpass.h create mode 100644 proxy_fdpass.conf diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..5b6e7c6 --- /dev/null +++ b/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/README b/README new file mode 100644 index 0000000..a7f5e77 --- /dev/null +++ b/README @@ -0,0 +1,17 @@ +DOCUMENTATION for this module is at + http://apache.webthing.com/mod_proxy_fdpass/ + +UPGRADING: IMPORTANT NOTE + +If you are upgrading from mod_proxy_fdpass 2.x (or 1.x), you will need +some new configuration. You can Include proxy_fdpass.conf from this +bundle in your httpd.conf (or apache.conf) to use Version 3 as a +drop-in replacement for Version 2. + + +WINDOWS USERS: + +You may need to install some prerequisite libraries before you can +load mod_proxy_fdpass into apache. If you don't already have them, +see the README at + http://apache.webthing.com/mod_proxy_fdpass/windows/ diff --git a/config.html b/config.html new file mode 100644 index 0000000..59bef92 --- /dev/null +++ b/config.html @@ -0,0 +1,254 @@ + + + +mod_proxy_fdpass + + +
+

mod_proxy_fdpass: Configuration

+

mod_proxy_fdpass Version 2.4 (Sept 2004) and upwards. +Updates in Version 3 (Dec. 2006) are highlighted.

+

Configuration Directives

+

The following can be used anywhere in an httpd.conf +or included configuration file.

+
+
ProxyHTMLURLMap
+
+

Syntax: +ProxyHTMLURLMap from-pattern to-pattern [flags] [cond]

+

This is the key directive for rewriting HTML links. When parsing a document, +whenever a link target matches from-pattern, the matching +portion will be rewritten to to-pattern.

+

Starting at version 2.0, this supports a wider range of pattern-matching +and substitutions, including regular expression search and replace, +controlled by the optional third flags argument. +

+

Starting at version 3.0, this also supports environment variable +interpolation using the V and v flags, and rules may apply conditionally +based on an environment variable. Note that interpolation takes place +before the parse starts, so variables set during the parse (e.g. +using SSI directives) will not apply. This flexible configuration +is enabled by the ProxyHTMLInterp directive, or can +be disabled for speed.

+

Flags for ProxyHTMLURLMap

+

Flags are case-sensitive.

+
+
h
+

Ignore HTML links (pass through unchanged)

+
e
+

Ignore scripting events (pass through unchanged)

+
c
+

Pass embedded script and style sections through untouched.

+
L
+

Last-match. If this rule matches, no more rules are applied +(note that this happens automatically for HTML links).

+
l
+
Opposite to L. Overrides the one-change-only default +behaviour with HTML links.
+
R
+

Use Regular Expression matching-and-replace. from-pattern +is a regexp, and to-pattern a replacement string that may be +based on the regexp. Regexp memory is supported: you can use brackets () +in the from-pattern and retrieve the matches with $1 to $9 +in the to-pattern.

+

If R is not set, it will use string-literal search-and-replace, as in +versions 1.x. Logic is starts-with in HTML links, but +contains in scripting events and embedded script and style sections. +

+
+
x
+

Use POSIX extended Regular Expressions. Only applicable with R.

+
i
+

Case-insensitive matching. Only applicable with R.

+
n
+

Disable regexp memory (for speed). Only applicable with R.

+
s
+

Line-based regexp matching. Only applicable with R.

+
^
+

Match at start only. This applies only to string matching +(not regexps) and is irrelevant to HTML links.

+
$
+

Match at end only. This applies only to string matching +(not regexps) and is irrelevant to HTML links.

+
V
+

Interpolate environment variables in to-pattern. +A string of the form ${varname|default} will be replaced by the +value of environment variable varname. If that is unset, it +is replaced by default. The |default is optional.

+

NOTE: interpolation will only be enabled if ProxyHTMLInterp is On.

+
+
v
+

Interpolate environment variables in from-pattern. +Patterns supported are as above.

+

NOTE: interpolation will only be enabled if ProxyHTMLInterp is On.

+
+
+

Conditions for ProxyHTMLURLMap

+

The optional cond argument specifies a condition to +test before the parse. If a condition is unsatisfied, the URLMap +will be ignored in this parse.

+

The condition takes the form [!]var[=val], and is +satisfied if the value of environment variable var +is val. If the optional =val is omitted, +then any value of var satisfies the condition, provided +only it is set to something. If the first character is !, +the condition is reversed.

+

NOTE: conditions will only be applied if ProxyHTMLInterp is On.

+
+
ProxyHTMLInterp
+
+

Syntax: ProxyHTMLInterp On|Off

+

Enables new (per-request) features of ProxyHTMLURLMap.

+
+
ProxyHTMLDoctype
+
+

Syntax: ProxyHTMLDoctype HTML|XHTML [Legacy]

+

Alternative Syntax: ProxyHTMLDocType fpi [SGML|XML]

+

In the first form, documents will be declared as HTML 4.01 or XHTML 1.0 +according to the option selected. This option also determines whether +HTML or XHTML syntax is used for output. Note that the format of the +documents coming from the backend server is immaterial: the parser will +deal with it automatically. If the optional second argument is set to +"Legacy", documents will be declared "Transitional", an option that may +be necessary if you are proxying pre-1998 content or working with defective +authoring/publishing tools.

+

In the second form, it will insert your own FPI. The optional second +argument determines whether SGML/HTML or XML/XHTML syntax will be used.

+

Starting at version 2.0, the default is changed to omitting any FPI, +on the grounds that no FPI is better than a bogus one. If your backend +generates decent HTML or XHTML, set it accordingly.

+

From version 3, if the first form is used, mod_proxy_fdpass +will also clean up the HTML to the specified standard. It cannot +fix every error, but it will strip out bogus elements and attributes. +It will also optionally log other errors at LogLevel Debug.

+
+
ProxyHTMLFixups
+
+

Syntax: ProxyHTMLFixups [lowercase] [dospath] [reset]

+

This directive takes one to three arguments as follows:

+
    +
  • lowercase Urls are rewritten to lowercase
  • +
  • dospath Backslashes in URLs are rewritten to forward slashes.
  • +
  • reset Unset any options set at a higher level in the configuration.
  • +
+

Take care when using these. The fixes will correct certain authoring +mistakes, but risk also erroneously fixing links that were correct to start with. +Only use them if you know you have a broken backend server.

+
+
ProxyHTMLMeta
+

Syntax ProxyHTMLMeta [On|Off]

+

Parses <meta http-equiv ...> elements to real HTTP +headers.

+

In version 3, this is also tied in with the improved +internationalisation support, and is +required to support some character encodings.

+
+
ProxyHTMLExtended
+

Syntax ProxyHTMLExtended [On|Off]

+

Set to Off, this gives the same behaviour as 1.x versions +of mod_proxy_fdpass. HTML links are rewritten according the ProxyHTMLURLMap +directives, but links appearing in Javascript and CSS are ignored.

+

Set to On, all scripting events and embedded scripts or +stylesheets are also processed by the ProxyHTMLURLMap rules, according to +the flags set for each rule. Since this requires more parsing, performance +will be best if you only enable it when strictly necessary.

+
+
ProxyHTMLStripComments
+

Syntax ProxyHTMLStripComments [On|Off]

+

This directive will cause mod_proxy_fdpass to strip HTML comments. +Note that this will also kill off any scripts or styles embedded in +comments (a bogosity introduced in 1995/6 with Netscape 2 for the +benefit of then-older browsers, but still in use today). +It may also interfere with comment-based processors such as SSI or ESI: +be sure to run any of those before mod_proxy_fdpass in the +filter chain if stripping comments!

+
+
ProxyHTMLLogVerbose
+

Syntax ProxyHTMLLogVerbose [On|Off]

+

Turns on verbose logging. This causes mod_proxy_fdpass to make +error log entries (at LogLevel Info) about charset +detection and about all meta substitutions and rewrites made. +When Off, only errors and warnings (if any) are logged.

+
+
ProxyHTMLBufSize
+

Syntax ProxyHTMLBufSize nnnn

+

Set the buffer size increment for buffering inline stylesheets and scripts.

+

In order to parse non-HTML content (stylesheets and scripts), mod_proxy_fdpass +has to read the entire script or stylesheet into a buffer. This buffer will +be expanded as necessary to hold the largest script or stylesheet in a page, +in increments of [nnnn] as set by this directive.

+

The default is 8192, and will work well for almost all pages. However, +if you know you're proxying a lot of pages containing stylesheets and/or +scripts bigger than 8K (that is, for a single script or stylesheet, +NOT in total), it will be more efficient to set a larger buffer +size and avoid the need to resize the buffer dynamically during a request. +

+
+
ProxyHTMLEvents
+
+

Syntax ProxyHTMLEvents attr [attr ...]

+

Specifies one or more attributes to treat as scripting events and +apply URLMaps to where appropriate. You can specify any number of +attributes in one or more ProxyHTMLEvents directives. +The sample configuration +defines the events in standard HTML 4 and XHTML 1.

+
+
ProxyHTMLLinks
+
+

Syntax ProxyHTMLLinks elt attr [attr ...]

+

Specifies elements that have URL attributes that should be rewritten +using standard URLMaps as in versions 1 and 2 of mod_proxy_fdpass. +You will need one ProxyHTMLLinks directive per element, +but it can have any number of attributes. The sample configuration +defines the HTML links for standard HTML 4 and XHTML 1.

+
+
ProxyHTMLCharsetAlias
+
+

Syntax ProxyHTMLCharsetAlias charset alias [alias ...]

+

This server-wide directive aliases one or more charset to another +charset. This enables encodings not recognised by libxml2 to be handled +internally by libxml2's charset support using the translation table for +a recognised charset.

+

For example, Latin 1 (ISO-8859-1) is supported by libxml2. +Microsoft's Windows-1252 is almost identical and can be supported +by aliasing it:
+ProxyHTMLCharsetAlias ISO-8859-1 Windows-1252

+
+
ProxyHTMLCharsetDefault
+
+

Syntax ProxyHTMLCharsetDefault name

+

This defines the default encoding to assume when absolutely no charset +information is available from the backend server. The default value for +this is ISO-8859-1, as specified in HTTP/1.0 and assumed in +earlier mod_proxy_fdpass versions.

+
+
ProxyHTMLCharsetOut
+
+

Syntax ProxyHTMLCharsetOut name

+

This selects an encoding for mod_proxy_fdpass output. It should not +normally be used, as any change from the default UTF-8 +(Unicode - as used internally by libxml2) will impose an additional +processing overhead. The special token ProxyHTMLCharsetOut * +will generate output using the same encoding as the input.

+
+
ProxyHTMLStartParse
+
+

Syntax ProxyHTMLStartParse element [elt*]

+

Specify that the HTML parser should start at the first instance +of any of the elements specified. This can be used where a broken +backend inserts leading junk that messes up the parser (example here).

+
+
+

Other Configuration

+

Normally, mod_proxy_fdpass will refuse to run when not +in a proxy or when the contents are not HTML. This can be overridden +(at your own risk) by setting the environment variable +PROXY_HTML_FORCE (e.g. with the SetEnv directive).

+
+ diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..5f95369 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,5 @@ +mod-proxy-fdpass (0.1.0-1) unstable; urgency=low + + * First backport of fdpass module + + -- Matteo Nastasi Mon, 25 Jun 2012 14:55:25 +0200 diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000..b8626c4 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +4 diff --git a/debian/conf/proxy_fdpass.load b/debian/conf/proxy_fdpass.load new file mode 100644 index 0000000..b27bcdb --- /dev/null +++ b/debian/conf/proxy_fdpass.load @@ -0,0 +1,2 @@ +# Depends: proxy +LoadModule proxy_fdpass_module /usr/lib/apache2/modules/mod_proxy_fdpass.so diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..8ed91bf --- /dev/null +++ b/debian/control @@ -0,0 +1,15 @@ +Source: mod-proxy-fdpass +Section: web +Priority: optional +Maintainer: Emmanuel Lacour +Build-Depends: debhelper (>= 4.0.0), apache2-prefork-dev (>> 2.2), libxml2-dev (>> 2.5.10) +Standards-Version: 0.1.0 + +Package: libapache2-mod-proxy-fdpass +Architecture: any +Depends: ${shlibs:Depends}, apache2, apache2.2-common, libxml2 (>> 2.5.10) +Description: Apache2 filter module for HTML links rewriting + mod_proxy_fdpass is an output filter to rewrite HTML links in a proxy + situation, to ensure that links work for users outside the proxy. It + serves the same purpose as Apache's ProxyPassReverse directive does for + HTTP headers, and is an essential component of a reverse proxy. diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000..a172cdb --- /dev/null +++ b/debian/copyright @@ -0,0 +1,26 @@ +This package was debianized by Emmanuel Lacour on +Sun, 24 Oct 2004 06:17:25 +0200. + +It was downloaded from http://apache.webthing.com/mod_proxy_fdpass/ + +Upstream Author: Nick Kew + +Copyright (c) 2003-7, WebThing Ltd + +License: + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + +see /usr/share/common-licenses/GPL for all details. diff --git a/debian/dirs b/debian/dirs new file mode 100644 index 0000000..59d28be --- /dev/null +++ b/debian/dirs @@ -0,0 +1,2 @@ +/usr/lib/apache2/modules +/etc/apache2/mods-available diff --git a/debian/docs b/debian/docs new file mode 100644 index 0000000..37ac420 --- /dev/null +++ b/debian/docs @@ -0,0 +1,3 @@ +config.html +guide.html +faq.html diff --git a/debian/install b/debian/install new file mode 100644 index 0000000..0cb76a2 --- /dev/null +++ b/debian/install @@ -0,0 +1,3 @@ +debian/conf/proxy_fdpass.load /etc/apache2/mods-available/ +proxy_fdpass.conf /etc/apache2/mods-available/ +.libs/mod_proxy_fdpass.so /usr/lib/apache2/modules/ diff --git a/debian/postinst b/debian/postinst new file mode 100644 index 0000000..e3cf6f7 --- /dev/null +++ b/debian/postinst @@ -0,0 +1,68 @@ +#! /bin/sh +# postinst script for libapache2-mod-proxy-fdpass +# +# see: dh_installdeb(1) + +set -e + +# summary of how this script can be called: +# * `configure' +# * `abort-upgrade' +# * `abort-remove' `in-favour' +# +# * `abort-deconfigure' `in-favour' +# `removing' +# +# for details, see http://www.debian.org/doc/debian-policy/ or +# the debian-policy package +# + +reload_apache() +{ + if apache2ctl configtest 2>/dev/null; then + invoke-rc.d apache2 force-reload || true + else + echo "Your apache2 configuration is broken, so we're not restarting it for you." + fi +} + + +case "$1" in + configure) + # Reload the module on upgrade if enabled + if [ -n "$2" ]; then + if [ -e /etc/apache2/mods-enabled/proxy_fdpass.load ]; then + # We must reenable this module to enable the new configuration file + if dpkg --compare-versions "$2" lt "3.0.0-1" ; then + a2dismod proxy_fdpass >/dev/null || true + a2enmod proxy_fdpass >/dev/null || true + fi + reload_apache + fi + else + # Enable the module + if [ -e /etc/apache2/apache2.conf ]; then + a2enmod proxy_fdpass >/dev/null || true + reload_apache + fi + fi + ;; + + abort-upgrade|abort-remove|abort-deconfigure) + + ;; + + *) + echo "postinst called with unknown argument \`$1'" >&2 + exit 1 + ;; +esac + +# dh_installdeb will replace this with shell code automatically +# generated by other debhelper scripts. + +#DEBHELPER# + +exit 0 + + diff --git a/debian/prerm b/debian/prerm new file mode 100644 index 0000000..9f64884 --- /dev/null +++ b/debian/prerm @@ -0,0 +1,51 @@ +#! /bin/sh +# prerm script for libapache2-mod-proxy-fdpass +# +# see: dh_installdeb(1) + +set -e + +# summary of how this script can be called: +# * `remove' +# * `upgrade' +# * `failed-upgrade' +# * `remove' `in-favour' +# * `deconfigure' `in-favour' +# `removing' +# +# for details, see http://www.debian.org/doc/debian-policy/ or +# the debian-policy package + +reload_apache() +{ + if apache2ctl configtest 2>/dev/null; then + invoke-rc.d apache2 force-reload || true + else + echo "Your apache2 configuration is broken, so we're not restarting it for you." + fi +} + + +case "$1" in + remove) + if [ -e /etc/apache2/mods-enabled/proxy_fdpass.load ]; then + a2dismod proxy_fdpass >/dev/null || true + reload_apache + fi + ;; + upgrade|failed-upgrade|deconfigure) + ;; + *) + echo "prerm called with unknown argument \`$1'" >&2 + exit 1 + ;; +esac + +# dh_installdeb will replace this with shell code automatically +# generated by other debhelper scripts. + +#DEBHELPER# + +exit 0 + + diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000..b0f820e --- /dev/null +++ b/debian/rules @@ -0,0 +1,52 @@ +#!/usr/bin/make -f +# -*- makefile -*- +# Sample debian/rules that uses debhelper. +# This file was originally written by Joey Hess and Craig Small. +# As a special exception, when this file is copied by dh-make into a +# dh-make output file, you may use that output file without restriction. +# This special exception was added by Craig Small in version 0.37 of dh-make. + +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 + +build: build-stamp + +build-stamp: + dh_testdir + apxs2 -c -I/usr/include/libxml2 mod_proxy_fdpass.c + touch build-stamp + +clean: + dh_testdir + dh_testroot + rm -f build-stamp mod_proxy_fdpass.la mod_proxy_fdpass.lo mod_proxy_fdpass.o mod_proxy_fdpass.slo + rm -rf .libs + dh_clean + +install: build + dh_testdir + dh_testroot + dh_clean -k + dh_installdirs + dh_install + +binary-indep: + +binary-arch: build install + dh_testdir + dh_testroot + dh_installchangelogs + dh_installdocs + dh_link + dh_strip + dh_compress + dh_fixperms + dh_makeshlibs + dh_installdeb + dh_shlibdeps + dh_gencontrol + dh_md5sums + dh_builddeb + +binary: binary-arch +.PHONY: build clean binary-arch binary-indep binary install diff --git a/faq.html b/faq.html new file mode 100644 index 0000000..16d2044 --- /dev/null +++ b/faq.html @@ -0,0 +1,74 @@ + + + +mod_proxy_fdpass + + +
+

mod_proxy_fdpass: Frequently Asked Questions

+

This answers some of the most frequently asked questions +that aren't dealt with (or that people overlook) in the documentation +and the apachetutor tutorial. This was written for +Version 2, and most of the questions are moot in Version 3.

+

Questions

+
    +
  1. Can mod_proxy_fdpass support (charset XYZ) as input?
  2. +
  3. Can mod_proxy_fdpass support (charset XYZ) as output?
  4. +
  5. Why does mod_proxy_fdpass mangle my Javascript?
  6. +
  7. Why doesn't mod_proxy_fdpass rewrite urls in [some attribute]?
  8. +
+

Answers

+
+
Can mod_proxy_fdpass support (charset XYZ) as input?
+

That depends entirely on libxml2. mod_proxy_fdpass supports +charset detection, but does not itself support any charsets. +It works by passing the charset detected to libxml2 when it sets +up the parser.

+

This means that mod_proxy_fdpass inherits its charset support +from libxml2, and will always support exactly the same +charsets available in the version of libxml2 you have installed. +So bug the libxml2 folks, not us!

+

In Version 3, charset support is much expanded provided +ProxyHTMLMeta is enabled, and any charset can be supported +by aliasing it with ProxyHTMLCharsetAlias.

+
+
Can mod_proxy_fdpass support (charset XYZ) as output?
+

libxml2 uses utf-8 internally for everything. +Generating output with another charset is therefore an additional +overhead, and the decision was taken to exclude any such capability +from mod_proxy_fdpass. There is an easy workaround: you can transcode +the output using another filter, such as mod_charset_lite.

+

Version 3 supports output transformation to other +charsets using ProxyHTMLCharsetOut.

+
+
Why does mod_proxy_fdpass mangle my Javascript?
+

It doesn't. Your javascript is simply too badly malformed, +and libxml2's error correction isn't what you expect! +Check it with a validator, +or with libxml2's xmllint --html +(which uses the same parser as mod_proxy_fdpass). Here is +a fuller explanation.

+

The best fix for this is to remove the javascript from your markup, +and import it from a separate .js file. If you have an +irredeemably broken publishing system, you may have to upgrade to +mod_publisher or resort to the +non-markup-aware mod_line_edit.

+
+
Why doesn't mod_proxy_fdpass rewrite urls in [some attribute]?
+

mod_proxy_fdpass is based on W3C HTML 4.01 and XHTML 1.0 (which are +identical in terms of elements and attributes). It supports all links +defined in W3C HTML, even those that have been deprecated since 1997. +But it does NOT support proprietary pseudo-HTML "extensions" +that have never been part of any published HTML standard. +Of course, it's trivial to add them to the source.

+

This has been the most commonly requested feature since mod_proxy_fdpass 2.0 +was released in 2004. It cannot reasonably be satisfied, because everyone's +pet "extensions" are different. Version 3 deals with this +by taking all HTML knowledge out of the code and loading it from httpd.conf +instead, so admins can meet their own needs without recompiling.

+
+
+
+ diff --git a/guide.html b/guide.html new file mode 100644 index 0000000..df758e5 --- /dev/null +++ b/guide.html @@ -0,0 +1,230 @@ + + + +Technical guide: mod_proxy_fdpass + + + +
+

mod_proxy_fdpass: Technical Guide

+

mod_proxy_fdpass From Version 2.4 (Sept 2004). +Updates in Version 3 (Dec. 2006) are highlighted.

+

Contents

+ +

URL Rewriting

+

Rewriting URLs into a proxy's address space is of course the primary +purpose of this module. From Version 2.0, this capability has been +extended from rewriting HTML URLs to processing scripts and stylesheets +that may contain URLs.

+

Because the module doesn't contain parsers for javascript or CSS, this +additional processing means we have had to introduce some heuristic parsing. +What that means is that the parser cannot automatically distinguish between +a URL that should be replaced and one that merely appears as text. It's +up to you to match the right things! To help you do this, we have introduced +some new features:

+
    +
  1. The ProxyHTMLExtended directive. The extended processing +will only be activated if this is On. The default is Off, which gives you +the old behaviour.
  2. +
  3. Regular Expression match-and-replace. This can be used anywhere, +but is most useful where context information can help distinguish URLs +that should be replaced and avoid false positives. For example, +to rewrite URLs of CSS @import, we might define a rule
    +ProxyHTMLURLMap url\(http://internal.example.com([^\)]*)\) url(http://proxy.example.com$1) Rihe
    +This explicitly rewrites from one servername to another, and uses regexp +memory to match a path and append it unchanged in $1, while using the +url(...) context to reduce the danger of a match that shouldn't +be rewritten. The R flag invokes regexp processing for this rule; +i makes the match case-insensitive; while h and e +save processing cycles by preventing the match being applied to HTML links +and scripting events, where it is clearly irrelevant.
  4. +
+

HTML Links

+

HTML links are those attributes defined by the HTML 4 and XHTML 1 +DTDs as of type %URI. For example, the href +attribute of the a element. For a full list, see the +declaration of linked_elts in pstartElement. +Rules are applicable provided the h flag is not set. +From Version 3, the definition of links to use is +delegated to the system administrator via the ProxyHTMLLinks +directive.

+

An HTML link always contains exactly one URL. So whenever mod_proxy_fdpass +finds a matching ProxyHTMLURLMap rule, it will apply the +transformation once and stop processing the attribute. This +can be overridden by the l flag, which causes processing +a URL to continue after a rewrite.

+

Scripting Events

+

Scripting events are the contents of event attributes as defined in the +HTML4 and XHTML1 DTDs; for example onclick. For a full list, +see the declaration of events in pstartElement. +Rules are applicable provided the e flag is not set. +From Version 3, the definition of events to use is +delegated to the system administrator via the ProxyHTMLEvents +directive.

+

A scripting event may contain more than one URL, and will contain other +text. So when ProxyHTMLExtended is On, all applicable rules +will be applied in order until and unless a rule with the L flag +matches. A rule may match more than once, provided the matches do not +overlap, so a URL/pattern that appears more than once is rewritten +every time it matches.

+

Embedded Scripts and Stylesheets

+

Embedded scripts and stylesheets are the contents of +<script> and <style> elements. +Rules are applicable provided the c flag is not set.

+

A script or stylesheet may contain more than one URL, and will contain other +text. So when ProxyHTMLExtended is On, all applicable rules +will be applied in order until and unless a rule with the L flag +matches. A rule may match more than once, provided the matches do not +overlap, so a URL/pattern that appears more than once is rewritten +every time it matches.

+

Output Transformation

+

mod_proxy_fdpass uses a SAX parser. This means that the input stream +- and hence the output generated - will be normalised in various ways, +even where nothing is actually rewritten. To an HTML or XML parser, +the document is not changed by normalisation, except as noted below. +Exceptions to this may arise where the input stream is malformed, when +the output of mod_proxy_fdpass may be undefined. These should of course +be fixed at the backend: if mod_proxy_fdpass doesn't work as expected, +then neither will browsers in real life, except by coincidence.

+

FPI (Doctype)

+

Strictly speaking, HTML and XHTML documents are required to have a +Formal Public Identifier (FPI), also know as a Document Type Declaration. +This references a Document Type Definition (DTD) which defines the grammar/ +syntax to which the contents of the document must conform.

+

The parser in mod_proxy_fdpass loses any FPI in the input document, but +gives you the option to insert one. You may select either HTML or XHTML +(see below), and if your backend is sloppy you may also want to use the +"Legacy" keyword to make it declare documents "Transitional". You may +also declare a custom DTD, or (if your backend is seriously screwed +so no DTD would be appropriate) omit it altogether.

+

HTML vs XHTML

+

The differences between HTML 4.01 and XHTML 1.0 are essentially negligible, +and mod_proxy_fdpass can transform between the two. You can safely select +either, regardless of what the backend generates, and mod_proxy_fdpass will +apply the appropriate rules in generating output. HTML saves a few bytes.

+

If you declare a custom DTD, you should specify whether to generate +HTML or XHTML syntax in the output. This affects empty elements: +HTML <br> vs XHTML <br />.

+

If you select standard HTML or XHTML, mod_proxy_fdpass 3 will +perform some additional fixups of bogus markup. If you don't want this, +you can enter a standard DTD using the nonstandard form of +ProxyHTMLDTD, which will then be treated as unknown +(no corrections).

+

Character Encoding

+

The parser uses UTF-8 (Unicode) internally, and +mod_proxy_fdpass prior to version 3 always generates output as UTF-8. +This is supported by all general-purpose web software, and supports more +character sets and languages than any other charset. +Version 3 supports, but does not recommend different outputs, using +the ProxyHTMLCharsetOut directive.

+

The character encoding should be declared in HTTP: for example
+Content-Type: text/html; charset=latin1
+mod_proxy_fdpass has always supported this in its input, and ensured +this happens in output. But prior to version 2, it did not fully +support detection (sniffing) the charset when a backend fails to +set the HTTP Header.

+

From version 2.0, mod_proxy_fdpass will detect the encoding of its input +as follows:

+
    +
  1. The HTTP headers, where available, always take precedence over other +information.
  2. +
  3. If the first 2-4 bytes are an XML Byte Order Mark (BOM), this is used.
  4. +
  5. If the document starts with an XML declaration +<?xml .... ?>, this determines encoding by XML rules.
  6. +
  7. If the document contains the HTML hack +<meta http-equiv="Content-Type" ...>, any charset declared +here is used.
  8. +
  9. In the absence of any of the above indications, the HTML-over-HTTP default +encoding ISO-8859-1 or the +ProxyHTMLCharsetDefault value is assumed.
  10. +
  11. The parser is set to ignore invalid characters, so a malformed input +stream will generate glitches (unexpected characters) rather than risk +aborting a parse altogether.
  12. +
+

In version 3.0, this remains the default, but +internationalisation support is further improved, and is no longer +limited to the encodings supported by libxml2:

+
    +
  • The ProxyHTMLCharsetAlias directive enables server +administrators to support additional encodings by aliasing them to +something supported by libxml2.
  • +
  • When a charset that is neither directly supported nor aliased is +encountered, mod_proxy_fdpass 3 will attempt to support it using Apache/APR's +charset conversion support in apr_xlate, which on most platforms +is a wrapper for the leading conversion utility iconv. +Because of undocumented behaviour of libxml2, this may cause problems +when charset is specified in an HTML META element. This +feature is therefore only enabled when ProxyHTMLMeta is On.
  • +
+ +

meta http-equiv support

+

The HTML meta element includes a form +<meta http-equiv="Some-Header" contents="some-value"> +which should notionally be converted to a real HTTP header by the webserver. +In practice, it is more commonly supported in browsers than servers, and +is common in constructs such as ClientPull (aka "meta refresh"). +The ProxyHTMLMeta directive supports the server generating +real HTTP headers from these. However, it does not strip them from the +HTML (except for Content-Type, which is removed in case it contains +conflicting charset information).

+

Other Fixups

+

For additional minor functions of mod_proxy_fdpass, please see the +ProxyHTMLFixups and ProxyHTMLStripComments +directives in the Configuration Guide.

+

Debugging your Configuration

+

From Version 2.1, mod_proxy_fdpass supports a ProxyHTMLLogVerbose +directive, to enable verbose logging at LogLevel Info. This +is designed to help with setting up your proxy configuration and +diagnosing unexpected behaviour; it is not recommended for normal +operation, and can be disabled altogether at compile time for extra +performance (see the top of the source).

+

When verbose logging is enabled, the following messages will be logged:

+
    +
  1. In Charset Detection, it will report what charset is +detected and how (HTTP rules, XML rules, or HTML rules). Note that, +regardless of verbose logging, an error or warning will be logged if an +unsupported charset is detected or if no information can be found.
  2. +
  3. When ProxyHTMLMeta is enabled, it logs each header/value +pair processed.
  4. +
  5. Whenever a ProxyHTMLURLMap rule matches and causes a +rewrite, it is logged. The message contains abbreviated context information: +H denotes an HTML link matched; E +denotes a match in a scripting event, C denotes a match +in an inline script or stylesheet. When the match is a regexp +find-and-replace, it is also marked as RX.
  6. +
+

Workarounds for Browser Bugs

+

Because mod_proxy_fdpass unsets the Content-Length header, it risks +losing the performance advantage of HTTP Keep-Alive. It therefore sets +up HTTP Chunked Encoding when responding to HTTP/1.1 requests. This +enables keep-alive again for HTTP/1.1 agents.

+

Unfortunately some buggy agents will send an HTTP/1.1 request but +choke on an HTTP/1.1 response. Typically you will see numbers before +and after, and possibly in the middle of, a page. To work around this, set the +force-response-1.0 environment variable in httpd.conf. +For example,
BrowserMatch MSIE force-response-1.0

+
+ diff --git a/mod_proxy_fdpass.c b/mod_proxy_fdpass.c new file mode 100644 index 0000000..01d3167 --- /dev/null +++ b/mod_proxy_fdpass.c @@ -0,0 +1,2463 @@ +#ifdef MOP_HERE_ONLY_FOR_EXAMPLE +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* HTTP routines for Apache proxy */ +#include "mod_proxy.h" +#include "ap_regex.h" + +module AP_MODULE_DECLARE_DATA proxy_http_module; + +static apr_status_t ap_proxy_http_cleanup(const char *scheme, + request_rec *r, + proxy_conn_rec *backend); + +/* + * Canonicalise http-like URLs. + * scheme is the scheme for the URL + * url is the URL starting with the first '/' + * def_port is the default port for this scheme. + */ +static int proxy_http_canon(request_rec *r, char *url) +{ + char *host, *path, sport[7]; + char *search = NULL; + const char *err; + const char *scheme; + apr_port_t port, def_port; + + { + int mop_fd; + int mop_bf[512]; + + mop_fd = open("/tmp/apache_mop.log", O_WRONLY | O_APPEND); + sprintf(mop_bf, "proxy_http_canon: start\n"); + write(mop_fd, mop_bf, strlen(mop_bf)); + close(mop_fd); + + } + /* ap_port_of_scheme() */ + if (strncasecmp(url, "http:", 5) == 0) { + url += 5; + scheme = "http"; + } + else if (strncasecmp(url, "https:", 6) == 0) { + url += 6; + scheme = "https"; + } + else { + return DECLINED; + } + def_port = apr_uri_port_of_scheme(scheme); + + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "proxy: HTTP: canonicalising URL %s", url); + + /* do syntatic check. + * We break the URL into host, port, path, search + */ + port = def_port; + err = ap_proxy_canon_netloc(r->pool, &url, NULL, NULL, &host, &port); + if (err) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, + "error parsing URL %s: %s", + url, err); + return HTTP_BAD_REQUEST; + } + + /* + * now parse path/search args, according to rfc1738: + * process the path. + * + * In a reverse proxy, our URL has been processed, so canonicalise + * unless proxy-nocanon is set to say it's raw + * In a forward proxy, we have and MUST NOT MANGLE the original. + */ + switch (r->proxyreq) { + default: /* wtf are we doing here? */ + case PROXYREQ_REVERSE: + if (apr_table_get(r->notes, "proxy-nocanon")) { + path = url; /* this is the raw path */ + } + else { + path = ap_proxy_canonenc(r->pool, url, strlen(url), + enc_path, 0, r->proxyreq); + search = r->args; + } + break; + case PROXYREQ_PROXY: + path = url; + break; + } + + if (path == NULL) + return HTTP_BAD_REQUEST; + + if (port != def_port) + apr_snprintf(sport, sizeof(sport), ":%d", port); + else + sport[0] = '\0'; + + if (ap_strchr_c(host, ':')) { /* if literal IPv6 address */ + host = apr_pstrcat(r->pool, "[", host, "]", NULL); + } + r->filename = apr_pstrcat(r->pool, "proxy:", scheme, "://", host, sport, + "/", path, (search) ? "?" : "", (search) ? search : "", NULL); + return OK; +} + +/* Clear all connection-based headers from the incoming headers table */ +typedef struct header_dptr { + apr_pool_t *pool; + apr_table_t *table; + apr_time_t time; +} header_dptr; +static ap_regex_t *warn_rx; +static int clean_warning_headers(void *data, const char *key, const char *val) +{ + apr_table_t *headers = ((header_dptr*)data)->table; + apr_pool_t *pool = ((header_dptr*)data)->pool; + char *warning; + char *date; + apr_time_t warn_time; + const int nmatch = 3; + ap_regmatch_t pmatch[3]; + + if (headers == NULL) { + ((header_dptr*)data)->table = headers = apr_table_make(pool, 2); + } +/* + * Parse this, suckers! + * + * Warning = "Warning" ":" 1#warning-value + * + * warning-value = warn-code SP warn-agent SP warn-text + * [SP warn-date] + * + * warn-code = 3DIGIT + * warn-agent = ( host [ ":" port ] ) | pseudonym + * ; the name or pseudonym of the server adding + * ; the Warning header, for use in debugging + * warn-text = quoted-string + * warn-date = <"> HTTP-date <"> + * + * Buggrit, use a bloomin' regexp! + * (\d{3}\s+\S+\s+\".*?\"(\s+\"(.*?)\")?) --> whole in $1, date in $3 + */ + while (!ap_regexec(warn_rx, val, nmatch, pmatch, 0)) { + warning = apr_pstrndup(pool, val+pmatch[0].rm_so, + pmatch[0].rm_eo - pmatch[0].rm_so); + warn_time = 0; + if (pmatch[2].rm_eo > pmatch[2].rm_so) { + /* OK, we have a date here */ + date = apr_pstrndup(pool, val+pmatch[2].rm_so, + pmatch[2].rm_eo - pmatch[2].rm_so); + warn_time = apr_date_parse_http(date); + } + if (!warn_time || (warn_time == ((header_dptr*)data)->time)) { + apr_table_addn(headers, key, warning); + } + val += pmatch[0].rm_eo; + } + return 1; +} +static apr_table_t *ap_proxy_clean_warnings(apr_pool_t *p, apr_table_t *headers) +{ + header_dptr x; + x.pool = p; + x.table = NULL; + x.time = apr_date_parse_http(apr_table_get(headers, "Date")); + apr_table_do(clean_warning_headers, &x, headers, "Warning", NULL); + if (x.table != NULL) { + apr_table_unset(headers, "Warning"); + return apr_table_overlay(p, headers, x.table); + } + else { + return headers; + } +} +static int clear_conn_headers(void *data, const char *key, const char *val) +{ + apr_table_t *headers = ((header_dptr*)data)->table; + apr_pool_t *pool = ((header_dptr*)data)->pool; + const char *name; + char *next = apr_pstrdup(pool, val); + while (*next) { + name = next; + while (*next && !apr_isspace(*next) && (*next != ',')) { + ++next; + } + while (*next && (apr_isspace(*next) || (*next == ','))) { + *next++ = '\0'; + } + apr_table_unset(headers, name); + } + return 1; +} +static void ap_proxy_clear_connection(apr_pool_t *p, apr_table_t *headers) +{ + header_dptr x; + x.pool = p; + x.table = headers; + apr_table_unset(headers, "Proxy-Connection"); + apr_table_do(clear_conn_headers, &x, headers, "Connection", NULL); + apr_table_unset(headers, "Connection"); +} +static void add_te_chunked(apr_pool_t *p, + apr_bucket_alloc_t *bucket_alloc, + apr_bucket_brigade *header_brigade) +{ + apr_bucket *e; + char *buf; + const char te_hdr[] = "Transfer-Encoding: chunked" CRLF; + + buf = apr_pmemdup(p, te_hdr, sizeof(te_hdr)-1); + ap_xlate_proto_to_ascii(buf, sizeof(te_hdr)-1); + + e = apr_bucket_pool_create(buf, sizeof(te_hdr)-1, p, bucket_alloc); + APR_BRIGADE_INSERT_TAIL(header_brigade, e); +} + +static void add_cl(apr_pool_t *p, + apr_bucket_alloc_t *bucket_alloc, + apr_bucket_brigade *header_brigade, + const char *cl_val) +{ + apr_bucket *e; + char *buf; + + buf = apr_pstrcat(p, "Content-Length: ", + cl_val, + CRLF, + NULL); + ap_xlate_proto_to_ascii(buf, strlen(buf)); + e = apr_bucket_pool_create(buf, strlen(buf), p, bucket_alloc); + APR_BRIGADE_INSERT_TAIL(header_brigade, e); +} + +#define ASCII_CRLF "\015\012" +#define ASCII_ZERO "\060" + +static void terminate_headers(apr_bucket_alloc_t *bucket_alloc, + apr_bucket_brigade *header_brigade) +{ + apr_bucket *e; + + /* add empty line at the end of the headers */ + e = apr_bucket_immortal_create(ASCII_CRLF, 2, bucket_alloc); + APR_BRIGADE_INSERT_TAIL(header_brigade, e); +} + +static int pass_brigade(apr_bucket_alloc_t *bucket_alloc, + request_rec *r, proxy_conn_rec *conn, + conn_rec *origin, apr_bucket_brigade *bb, + int flush) +{ + apr_status_t status; + apr_off_t transferred; + + if (flush) { + apr_bucket *e = apr_bucket_flush_create(bucket_alloc); + APR_BRIGADE_INSERT_TAIL(bb, e); + } + apr_brigade_length(bb, 0, &transferred); + if (transferred != -1) + conn->worker->s->transferred += transferred; + status = ap_pass_brigade(origin->output_filters, bb); + if (status != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, status, r->server, + "proxy: pass request body failed to %pI (%s)", + conn->addr, conn->hostname); + if (origin->aborted) { + return APR_STATUS_IS_TIMEUP(status) ? HTTP_GATEWAY_TIME_OUT : HTTP_BAD_GATEWAY; + } + else { + return HTTP_BAD_REQUEST; + } + } + apr_brigade_cleanup(bb); + return OK; +} + +#define MAX_MEM_SPOOL 16384 + +static int stream_reqbody_chunked(apr_pool_t *p, + request_rec *r, + proxy_conn_rec *p_conn, + conn_rec *origin, + apr_bucket_brigade *header_brigade, + apr_bucket_brigade *input_brigade) +{ + int seen_eos = 0, rv = OK; + apr_size_t hdr_len; + apr_off_t bytes; + apr_status_t status; + apr_bucket_alloc_t *bucket_alloc = r->connection->bucket_alloc; + apr_bucket_brigade *bb; + apr_bucket *e; + + add_te_chunked(p, bucket_alloc, header_brigade); + terminate_headers(bucket_alloc, header_brigade); + + while (!APR_BUCKET_IS_EOS(APR_BRIGADE_FIRST(input_brigade))) + { + char chunk_hdr[20]; /* must be here due to transient bucket. */ + + /* If this brigade contains EOS, either stop or remove it. */ + if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(input_brigade))) { + seen_eos = 1; + + /* We can't pass this EOS to the output_filters. */ + e = APR_BRIGADE_LAST(input_brigade); + apr_bucket_delete(e); + } + + apr_brigade_length(input_brigade, 1, &bytes); + + hdr_len = apr_snprintf(chunk_hdr, sizeof(chunk_hdr), + "%" APR_UINT64_T_HEX_FMT CRLF, + (apr_uint64_t)bytes); + + ap_xlate_proto_to_ascii(chunk_hdr, hdr_len); + e = apr_bucket_transient_create(chunk_hdr, hdr_len, + bucket_alloc); + APR_BRIGADE_INSERT_HEAD(input_brigade, e); + + /* + * Append the end-of-chunk CRLF + */ + e = apr_bucket_immortal_create(ASCII_CRLF, 2, bucket_alloc); + APR_BRIGADE_INSERT_TAIL(input_brigade, e); + + if (header_brigade) { + /* we never sent the header brigade, so go ahead and + * take care of that now + */ + bb = header_brigade; + + /* + * Save input_brigade in bb brigade. (At least) in the SSL case + * input_brigade contains transient buckets whose data would get + * overwritten during the next call of ap_get_brigade in the loop. + * ap_save_brigade ensures these buckets to be set aside. + * Calling ap_save_brigade with NULL as filter is OK, because + * bb brigade already has been created and does not need to get + * created by ap_save_brigade. + */ + status = ap_save_brigade(NULL, &bb, &input_brigade, p); + if (status != APR_SUCCESS) { + return HTTP_INTERNAL_SERVER_ERROR; + } + + header_brigade = NULL; + } + else { + bb = input_brigade; + } + + /* The request is flushed below this loop with chunk EOS header */ + rv = pass_brigade(bucket_alloc, r, p_conn, origin, bb, 0); + if (rv != OK) { + return rv; + } + + if (seen_eos) { + break; + } + + status = ap_get_brigade(r->input_filters, input_brigade, + AP_MODE_READBYTES, APR_BLOCK_READ, + HUGE_STRING_LEN); + + if (status != APR_SUCCESS) { + return HTTP_BAD_REQUEST; + } + } + + if (header_brigade) { + /* we never sent the header brigade because there was no request body; + * send it now + */ + bb = header_brigade; + } + else { + if (!APR_BRIGADE_EMPTY(input_brigade)) { + /* input brigade still has an EOS which we can't pass to the output_filters. */ + e = APR_BRIGADE_LAST(input_brigade); + AP_DEBUG_ASSERT(APR_BUCKET_IS_EOS(e)); + apr_bucket_delete(e); + } + bb = input_brigade; + } + + e = apr_bucket_immortal_create(ASCII_ZERO ASCII_CRLF + /* */ + ASCII_CRLF, + 5, bucket_alloc); + APR_BRIGADE_INSERT_TAIL(bb, e); + + /* Now we have headers-only, or the chunk EOS mark; flush it */ + rv = pass_brigade(bucket_alloc, r, p_conn, origin, bb, 1); + return rv; +} + +static int stream_reqbody_cl(apr_pool_t *p, + request_rec *r, + proxy_conn_rec *p_conn, + conn_rec *origin, + apr_bucket_brigade *header_brigade, + apr_bucket_brigade *input_brigade, + const char *old_cl_val) +{ + int seen_eos = 0, rv = 0; + apr_status_t status = APR_SUCCESS; + apr_bucket_alloc_t *bucket_alloc = r->connection->bucket_alloc; + apr_bucket_brigade *bb; + apr_bucket *e; + apr_off_t cl_val = 0; + apr_off_t bytes; + apr_off_t bytes_streamed = 0; + + if (old_cl_val) { + char *endstr; + + add_cl(p, bucket_alloc, header_brigade, old_cl_val); + status = apr_strtoff(&cl_val, old_cl_val, &endstr, 10); + + if (status || *endstr || endstr == old_cl_val || cl_val < 0) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, + "proxy: could not parse request Content-Length (%s)", + old_cl_val); + return HTTP_BAD_REQUEST; + } + } + terminate_headers(bucket_alloc, header_brigade); + + while (!APR_BUCKET_IS_EOS(APR_BRIGADE_FIRST(input_brigade))) + { + apr_brigade_length(input_brigade, 1, &bytes); + bytes_streamed += bytes; + + /* If this brigade contains EOS, either stop or remove it. */ + if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(input_brigade))) { + seen_eos = 1; + + /* We can't pass this EOS to the output_filters. */ + e = APR_BRIGADE_LAST(input_brigade); + apr_bucket_delete(e); + } + + /* C-L < bytes streamed?!? + * We will error out after the body is completely + * consumed, but we can't stream more bytes at the + * back end since they would in part be interpreted + * as another request! If nothing is sent, then + * just send nothing. + * + * Prevents HTTP Response Splitting. + */ + if (bytes_streamed > cl_val) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, + "proxy: read more bytes of request body than expected " + "(got %" APR_OFF_T_FMT ", expected %" APR_OFF_T_FMT ")", + bytes_streamed, cl_val); + return HTTP_INTERNAL_SERVER_ERROR; + } + + if (header_brigade) { + /* we never sent the header brigade, so go ahead and + * take care of that now + */ + bb = header_brigade; + + /* + * Save input_brigade in bb brigade. (At least) in the SSL case + * input_brigade contains transient buckets whose data would get + * overwritten during the next call of ap_get_brigade in the loop. + * ap_save_brigade ensures these buckets to be set aside. + * Calling ap_save_brigade with NULL as filter is OK, because + * bb brigade already has been created and does not need to get + * created by ap_save_brigade. + */ + status = ap_save_brigade(NULL, &bb, &input_brigade, p); + if (status != APR_SUCCESS) { + return HTTP_INTERNAL_SERVER_ERROR; + } + + header_brigade = NULL; + } + else { + bb = input_brigade; + } + + /* Once we hit EOS, we are ready to flush. */ + rv = pass_brigade(bucket_alloc, r, p_conn, origin, bb, seen_eos); + if (rv != OK) { + return rv ; + } + + if (seen_eos) { + break; + } + + status = ap_get_brigade(r->input_filters, input_brigade, + AP_MODE_READBYTES, APR_BLOCK_READ, + HUGE_STRING_LEN); + + if (status != APR_SUCCESS) { + return HTTP_BAD_REQUEST; + } + } + + if (bytes_streamed != cl_val) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "proxy: client %s given Content-Length did not match" + " number of body bytes read", r->connection->remote_ip); + return HTTP_BAD_REQUEST; + } + + if (header_brigade) { + /* we never sent the header brigade since there was no request + * body; send it now with the flush flag + */ + bb = header_brigade; + return(pass_brigade(bucket_alloc, r, p_conn, origin, bb, 1)); + } + + return OK; +} + +static int spool_reqbody_cl(apr_pool_t *p, + request_rec *r, + proxy_conn_rec *p_conn, + conn_rec *origin, + apr_bucket_brigade *header_brigade, + apr_bucket_brigade *input_brigade, + int force_cl) +{ + int seen_eos = 0; + apr_status_t status; + apr_bucket_alloc_t *bucket_alloc = r->connection->bucket_alloc; + apr_bucket_brigade *body_brigade; + apr_bucket *e; + apr_off_t bytes, bytes_spooled = 0, fsize = 0; + apr_file_t *tmpfile = NULL; + + body_brigade = apr_brigade_create(p, bucket_alloc); + + while (!APR_BUCKET_IS_EOS(APR_BRIGADE_FIRST(input_brigade))) + { + /* If this brigade contains EOS, either stop or remove it. */ + if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(input_brigade))) { + seen_eos = 1; + + /* We can't pass this EOS to the output_filters. */ + e = APR_BRIGADE_LAST(input_brigade); + apr_bucket_delete(e); + } + + apr_brigade_length(input_brigade, 1, &bytes); + + if (bytes_spooled + bytes > MAX_MEM_SPOOL) { + /* can't spool any more in memory; write latest brigade to disk */ + if (tmpfile == NULL) { + const char *temp_dir; + char *template; + + status = apr_temp_dir_get(&temp_dir, p); + if (status != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, status, r->server, + "proxy: search for temporary directory failed"); + return HTTP_INTERNAL_SERVER_ERROR; + } + apr_filepath_merge(&template, temp_dir, + "modproxy.tmp.XXXXXX", + APR_FILEPATH_NATIVE, p); + status = apr_file_mktemp(&tmpfile, template, 0, p); + if (status != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, status, r->server, + "proxy: creation of temporary file in directory %s failed", + temp_dir); + return HTTP_INTERNAL_SERVER_ERROR; + } + } + for (e = APR_BRIGADE_FIRST(input_brigade); + e != APR_BRIGADE_SENTINEL(input_brigade); + e = APR_BUCKET_NEXT(e)) { + const char *data; + apr_size_t bytes_read, bytes_written; + + apr_bucket_read(e, &data, &bytes_read, APR_BLOCK_READ); + status = apr_file_write_full(tmpfile, data, bytes_read, &bytes_written); + if (status != APR_SUCCESS) { + const char *tmpfile_name; + + if (apr_file_name_get(&tmpfile_name, tmpfile) != APR_SUCCESS) { + tmpfile_name = "(unknown)"; + } + ap_log_error(APLOG_MARK, APLOG_ERR, status, r->server, + "proxy: write to temporary file %s failed", + tmpfile_name); + return HTTP_INTERNAL_SERVER_ERROR; + } + AP_DEBUG_ASSERT(bytes_read == bytes_written); + fsize += bytes_written; + } + apr_brigade_cleanup(input_brigade); + } + else { + + /* + * Save input_brigade in body_brigade. (At least) in the SSL case + * input_brigade contains transient buckets whose data would get + * overwritten during the next call of ap_get_brigade in the loop. + * ap_save_brigade ensures these buckets to be set aside. + * Calling ap_save_brigade with NULL as filter is OK, because + * body_brigade already has been created and does not need to get + * created by ap_save_brigade. + */ + status = ap_save_brigade(NULL, &body_brigade, &input_brigade, p); + if (status != APR_SUCCESS) { + return HTTP_INTERNAL_SERVER_ERROR; + } + + } + + bytes_spooled += bytes; + + if (seen_eos) { + break; + } + + status = ap_get_brigade(r->input_filters, input_brigade, + AP_MODE_READBYTES, APR_BLOCK_READ, + HUGE_STRING_LEN); + + if (status != APR_SUCCESS) { + return HTTP_BAD_REQUEST; + } + } + + if (bytes_spooled || force_cl) { + add_cl(p, bucket_alloc, header_brigade, apr_off_t_toa(p, bytes_spooled)); + } + terminate_headers(bucket_alloc, header_brigade); + APR_BRIGADE_CONCAT(header_brigade, body_brigade); + if (tmpfile) { + /* For platforms where the size of the file may be larger than + * that which can be stored in a single bucket (where the + * length field is an apr_size_t), split it into several + * buckets: */ + if (sizeof(apr_off_t) > sizeof(apr_size_t) + && fsize > AP_MAX_SENDFILE) { + e = apr_bucket_file_create(tmpfile, 0, AP_MAX_SENDFILE, p, + bucket_alloc); + while (fsize > AP_MAX_SENDFILE) { + apr_bucket *ce; + apr_bucket_copy(e, &ce); + APR_BRIGADE_INSERT_TAIL(header_brigade, ce); + e->start += AP_MAX_SENDFILE; + fsize -= AP_MAX_SENDFILE; + } + e->length = (apr_size_t)fsize; /* Resize just the last bucket */ + } + else { + e = apr_bucket_file_create(tmpfile, 0, (apr_size_t)fsize, p, + bucket_alloc); + } + APR_BRIGADE_INSERT_TAIL(header_brigade, e); + } + /* This is all a single brigade, pass with flush flagged */ + return(pass_brigade(bucket_alloc, r, p_conn, origin, header_brigade, 1)); +} + +static +int ap_proxy_http_request(apr_pool_t *p, request_rec *r, + proxy_conn_rec *p_conn, conn_rec *origin, + proxy_server_conf *conf, + apr_uri_t *uri, + char *url, char *server_portstr) +{ + conn_rec *c = r->connection; + apr_bucket_alloc_t *bucket_alloc = c->bucket_alloc; + apr_bucket_brigade *header_brigade; + apr_bucket_brigade *input_brigade; + apr_bucket_brigade *temp_brigade; + apr_bucket *e; + char *buf; + const apr_array_header_t *headers_in_array; + const apr_table_entry_t *headers_in; + int counter; + apr_status_t status; + enum rb_methods {RB_INIT, RB_STREAM_CL, RB_STREAM_CHUNKED, RB_SPOOL_CL}; + enum rb_methods rb_method = RB_INIT; + const char *old_cl_val = NULL; + const char *old_te_val = NULL; + apr_off_t bytes_read = 0; + apr_off_t bytes; + int force10, rv; + apr_table_t *headers_in_copy; + + header_brigade = apr_brigade_create(p, origin->bucket_alloc); + + /* + * Send the HTTP/1.1 request to the remote server + */ + + if (apr_table_get(r->subprocess_env, "force-proxy-request-1.0")) { + buf = apr_pstrcat(p, r->method, " ", url, " HTTP/1.0" CRLF, NULL); + force10 = 1; + /* + * According to RFC 2616 8.2.3 we are not allowed to forward an + * Expect: 100-continue to an HTTP/1.0 server. Instead we MUST return + * a HTTP_EXPECTATION_FAILED + */ + if (r->expecting_100) { + return HTTP_EXPECTATION_FAILED; + } + p_conn->close++; + } else { + buf = apr_pstrcat(p, r->method, " ", url, " HTTP/1.1" CRLF, NULL); + force10 = 0; + } + if (apr_table_get(r->subprocess_env, "proxy-nokeepalive")) { + origin->keepalive = AP_CONN_CLOSE; + p_conn->close++; + } + ap_xlate_proto_to_ascii(buf, strlen(buf)); + e = apr_bucket_pool_create(buf, strlen(buf), p, c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(header_brigade, e); + if (conf->preserve_host == 0) { + if (ap_strchr_c(uri->hostname, ':')) { /* if literal IPv6 address */ + if (uri->port_str && uri->port != DEFAULT_HTTP_PORT) { + buf = apr_pstrcat(p, "Host: [", uri->hostname, "]:", + uri->port_str, CRLF, NULL); + } else { + buf = apr_pstrcat(p, "Host: [", uri->hostname, "]", CRLF, NULL); + } + } else { + if (uri->port_str && uri->port != DEFAULT_HTTP_PORT) { + buf = apr_pstrcat(p, "Host: ", uri->hostname, ":", + uri->port_str, CRLF, NULL); + } else { + buf = apr_pstrcat(p, "Host: ", uri->hostname, CRLF, NULL); + } + } + } + else { + /* don't want to use r->hostname, as the incoming header might have a + * port attached + */ + const char* hostname = apr_table_get(r->headers_in,"Host"); + if (!hostname) { + hostname = r->server->server_hostname; + ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, + "proxy: no HTTP 0.9 request (with no host line) " + "on incoming request and preserve host set " + "forcing hostname to be %s for uri %s", + hostname, + r->uri ); + } + buf = apr_pstrcat(p, "Host: ", hostname, CRLF, NULL); + } + ap_xlate_proto_to_ascii(buf, strlen(buf)); + e = apr_bucket_pool_create(buf, strlen(buf), p, c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(header_brigade, e); + + /* handle Via */ + if (conf->viaopt == via_block) { + /* Block all outgoing Via: headers */ + apr_table_unset(r->headers_in, "Via"); + } else if (conf->viaopt != via_off) { + const char *server_name = ap_get_server_name(r); + /* If USE_CANONICAL_NAME_OFF was configured for the proxy virtual host, + * then the server name returned by ap_get_server_name() is the + * origin server name (which does make too much sense with Via: headers) + * so we use the proxy vhost's name instead. + */ + if (server_name == r->hostname) + server_name = r->server->server_hostname; + /* Create a "Via:" request header entry and merge it */ + /* Generate outgoing Via: header with/without server comment: */ + apr_table_mergen(r->headers_in, "Via", + (conf->viaopt == via_full) + ? apr_psprintf(p, "%d.%d %s%s (%s)", + HTTP_VERSION_MAJOR(r->proto_num), + HTTP_VERSION_MINOR(r->proto_num), + server_name, server_portstr, + AP_SERVER_BASEVERSION) + : apr_psprintf(p, "%d.%d %s%s", + HTTP_VERSION_MAJOR(r->proto_num), + HTTP_VERSION_MINOR(r->proto_num), + server_name, server_portstr) + ); + } + + /* X-Forwarded-*: handling + * + * XXX Privacy Note: + * ----------------- + * + * These request headers are only really useful when the mod_proxy + * is used in a reverse proxy configuration, so that useful info + * about the client can be passed through the reverse proxy and on + * to the backend server, which may require the information to + * function properly. + * + * In a forward proxy situation, these options are a potential + * privacy violation, as information about clients behind the proxy + * are revealed to arbitrary servers out there on the internet. + * + * The HTTP/1.1 Via: header is designed for passing client + * information through proxies to a server, and should be used in + * a forward proxy configuation instead of X-Forwarded-*. See the + * ProxyVia option for details. + */ + + if (PROXYREQ_REVERSE == r->proxyreq) { + const char *buf; + + /* Add X-Forwarded-For: so that the upstream has a chance to + * determine, where the original request came from. + */ + apr_table_mergen(r->headers_in, "X-Forwarded-For", + c->remote_ip); + + /* Add X-Forwarded-Host: so that upstream knows what the + * original request hostname was. + */ + if ((buf = apr_table_get(r->headers_in, "Host"))) { + apr_table_mergen(r->headers_in, "X-Forwarded-Host", buf); + } + + /* Add X-Forwarded-Server: so that upstream knows what the + * name of this proxy server is (if there are more than one) + * XXX: This duplicates Via: - do we strictly need it? + */ + apr_table_mergen(r->headers_in, "X-Forwarded-Server", + r->server->server_hostname); + } + + proxy_run_fixups(r); + /* + * Make a copy of the headers_in table before clearing the connection + * headers as we need the connection headers later in the http output + * filter to prepare the correct response headers. + * + * Note: We need to take r->pool for apr_table_copy as the key / value + * pairs in r->headers_in have been created out of r->pool and + * p might be (and actually is) a longer living pool. + * This would trigger the bad pool ancestry abort in apr_table_copy if + * apr is compiled with APR_POOL_DEBUG. + */ + headers_in_copy = apr_table_copy(r->pool, r->headers_in); + ap_proxy_clear_connection(p, headers_in_copy); + /* send request headers */ + headers_in_array = apr_table_elts(headers_in_copy); + headers_in = (const apr_table_entry_t *) headers_in_array->elts; + for (counter = 0; counter < headers_in_array->nelts; counter++) { + if (headers_in[counter].key == NULL + || headers_in[counter].val == NULL + + /* Already sent */ + || !strcasecmp(headers_in[counter].key, "Host") + + /* Clear out hop-by-hop request headers not to send + * RFC2616 13.5.1 says we should strip these headers + */ + || !strcasecmp(headers_in[counter].key, "Keep-Alive") + || !strcasecmp(headers_in[counter].key, "TE") + || !strcasecmp(headers_in[counter].key, "Trailer") + || !strcasecmp(headers_in[counter].key, "Upgrade") + + ) { + continue; + } + /* Do we want to strip Proxy-Authorization ? + * If we haven't used it, then NO + * If we have used it then MAYBE: RFC2616 says we MAY propagate it. + * So let's make it configurable by env. + */ + if (!strcasecmp(headers_in[counter].key,"Proxy-Authorization")) { + if (r->user != NULL) { /* we've authenticated */ + if (!apr_table_get(r->subprocess_env, "Proxy-Chain-Auth")) { + continue; + } + } + } + + /* Skip Transfer-Encoding and Content-Length for now. + */ + if (!strcasecmp(headers_in[counter].key, "Transfer-Encoding")) { + old_te_val = headers_in[counter].val; + continue; + } + if (!strcasecmp(headers_in[counter].key, "Content-Length")) { + old_cl_val = headers_in[counter].val; + continue; + } + + /* for sub-requests, ignore freshness/expiry headers */ + if (r->main) { + if ( !strcasecmp(headers_in[counter].key, "If-Match") + || !strcasecmp(headers_in[counter].key, "If-Modified-Since") + || !strcasecmp(headers_in[counter].key, "If-Range") + || !strcasecmp(headers_in[counter].key, "If-Unmodified-Since") + || !strcasecmp(headers_in[counter].key, "If-None-Match")) { + continue; + } + } + + buf = apr_pstrcat(p, headers_in[counter].key, ": ", + headers_in[counter].val, CRLF, + NULL); + ap_xlate_proto_to_ascii(buf, strlen(buf)); + e = apr_bucket_pool_create(buf, strlen(buf), p, c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(header_brigade, e); + } + + /* We have headers, let's figure out our request body... */ + input_brigade = apr_brigade_create(p, bucket_alloc); + + /* sub-requests never use keepalives, and mustn't pass request bodies. + * Because the new logic looks at input_brigade, we will self-terminate + * input_brigade and jump past all of the request body logic... + * Reading anything with ap_get_brigade is likely to consume the + * main request's body or read beyond EOS - which would be unplesant. + */ + if (r->main) { + /* XXX: Why DON'T sub-requests use keepalives? */ + p_conn->close++; + if (old_cl_val) { + old_cl_val = NULL; + apr_table_unset(r->headers_in, "Content-Length"); + } + if (old_te_val) { + old_te_val = NULL; + apr_table_unset(r->headers_in, "Transfer-Encoding"); + } + rb_method = RB_STREAM_CL; + e = apr_bucket_eos_create(input_brigade->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(input_brigade, e); + goto skip_body; + } + + /* WE only understand chunked. Other modules might inject + * (and therefore, decode) other flavors but we don't know + * that the can and have done so unless they they remove + * their decoding from the headers_in T-E list. + * XXX: Make this extensible, but in doing so, presume the + * encoding has been done by the extensions' handler, and + * do not modify add_te_chunked's logic + */ + if (old_te_val && strcasecmp(old_te_val, "chunked") != 0) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "proxy: %s Transfer-Encoding is not supported", + old_te_val); + return HTTP_INTERNAL_SERVER_ERROR; + } + + if (old_cl_val && old_te_val) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_ENOTIMPL, r->server, + "proxy: client %s (%s) requested Transfer-Encoding " + "chunked body with Content-Length (C-L ignored)", + c->remote_ip, c->remote_host ? c->remote_host: ""); + apr_table_unset(r->headers_in, "Content-Length"); + old_cl_val = NULL; + origin->keepalive = AP_CONN_CLOSE; + p_conn->close++; + } + + /* Prefetch MAX_MEM_SPOOL bytes + * + * This helps us avoid any election of C-L v.s. T-E + * request bodies, since we are willing to keep in + * memory this much data, in any case. This gives + * us an instant C-L election if the body is of some + * reasonable size. + */ + temp_brigade = apr_brigade_create(p, bucket_alloc); + do { + status = ap_get_brigade(r->input_filters, temp_brigade, + AP_MODE_READBYTES, APR_BLOCK_READ, + MAX_MEM_SPOOL - bytes_read); + if (status != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, status, r->server, + "proxy: prefetch request body failed to %pI (%s)" + " from %s (%s)", + p_conn->addr, p_conn->hostname ? p_conn->hostname: "", + c->remote_ip, c->remote_host ? c->remote_host: ""); + return HTTP_BAD_REQUEST; + } + + apr_brigade_length(temp_brigade, 1, &bytes); + bytes_read += bytes; + + /* + * Save temp_brigade in input_brigade. (At least) in the SSL case + * temp_brigade contains transient buckets whose data would get + * overwritten during the next call of ap_get_brigade in the loop. + * ap_save_brigade ensures these buckets to be set aside. + * Calling ap_save_brigade with NULL as filter is OK, because + * input_brigade already has been created and does not need to get + * created by ap_save_brigade. + */ + status = ap_save_brigade(NULL, &input_brigade, &temp_brigade, p); + if (status != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, status, r->server, + "proxy: processing prefetched request body failed" + " to %pI (%s) from %s (%s)", + p_conn->addr, p_conn->hostname ? p_conn->hostname: "", + c->remote_ip, c->remote_host ? c->remote_host: ""); + return HTTP_INTERNAL_SERVER_ERROR; + } + + /* Ensure we don't hit a wall where we have a buffer too small + * for ap_get_brigade's filters to fetch us another bucket, + * surrender once we hit 80 bytes less than MAX_MEM_SPOOL + * (an arbitrary value.) + */ + } while ((bytes_read < MAX_MEM_SPOOL - 80) + && !APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(input_brigade))); + + /* Use chunked request body encoding or send a content-length body? + * + * Prefer C-L when: + * + * We have no request body (handled by RB_STREAM_CL) + * + * We have a request body length <= MAX_MEM_SPOOL + * + * The administrator has setenv force-proxy-request-1.0 + * + * The client sent a C-L body, and the administrator has + * not setenv proxy-sendchunked or has set setenv proxy-sendcl + * + * The client sent a T-E body, and the administrator has + * setenv proxy-sendcl, and not setenv proxy-sendchunked + * + * If both proxy-sendcl and proxy-sendchunked are set, the + * behavior is the same as if neither were set, large bodies + * that can't be read will be forwarded in their original + * form of C-L, or T-E. + * + * To ensure maximum compatibility, setenv proxy-sendcl + * To reduce server resource use, setenv proxy-sendchunked + * + * Then address specific servers with conditional setenv + * options to restore the default behavior where desireable. + * + * We have to compute content length by reading the entire request + * body; if request body is not small, we'll spool the remaining + * input to a temporary file. Chunked is always preferable. + * + * We can only trust the client-provided C-L if the T-E header + * is absent, and the filters are unchanged (the body won't + * be resized by another content filter). + */ + if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(input_brigade))) { + /* The whole thing fit, so our decision is trivial, use + * the filtered bytes read from the client for the request + * body Content-Length. + * + * If we expected no body, and read no body, do not set + * the Content-Length. + */ + if (old_cl_val || old_te_val || bytes_read) { + old_cl_val = apr_off_t_toa(r->pool, bytes_read); + } + rb_method = RB_STREAM_CL; + } + else if (old_te_val) { + if (force10 + || (apr_table_get(r->subprocess_env, "proxy-sendcl") + && !apr_table_get(r->subprocess_env, "proxy-sendchunks") + && !apr_table_get(r->subprocess_env, "proxy-sendchunked"))) { + rb_method = RB_SPOOL_CL; + } + else { + rb_method = RB_STREAM_CHUNKED; + } + } + else if (old_cl_val) { + if (r->input_filters == r->proto_input_filters) { + rb_method = RB_STREAM_CL; + } + else if (!force10 + && (apr_table_get(r->subprocess_env, "proxy-sendchunks") + || apr_table_get(r->subprocess_env, "proxy-sendchunked")) + && !apr_table_get(r->subprocess_env, "proxy-sendcl")) { + rb_method = RB_STREAM_CHUNKED; + } + else { + rb_method = RB_SPOOL_CL; + } + } + else { + /* This is an appropriate default; very efficient for no-body + * requests, and has the behavior that it will not add any C-L + * when the old_cl_val is NULL. + */ + rb_method = RB_SPOOL_CL; + } + +/* Yes I hate gotos. This is the subrequest shortcut */ +skip_body: + /* + * Handle Connection: header if we do HTTP/1.1 request: + * If we plan to close the backend connection sent Connection: close + * otherwise sent Connection: Keep-Alive. + */ + if (!force10) { + int mop_fd; + int mop_bf[512]; + + mop_fd = open("/tmp/apache_mop.log", O_WRONLY | O_APPEND); + sprintf(mop_bf, "conn_close1: [%d][%d]\n", p_conn->close, p_conn->close_on_recycle); + write(mop_fd, mop_bf, strlen(mop_bf)); + close(mop_fd); + + if (p_conn->close || p_conn->close_on_recycle) { + buf = apr_pstrdup(p, "Connection: close" CRLF); + } + else { + buf = apr_pstrdup(p, "Connection: Keep-Alive" CRLF); + } + ap_xlate_proto_to_ascii(buf, strlen(buf)); + e = apr_bucket_pool_create(buf, strlen(buf), p, c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(header_brigade, e); + } + + /* send the request body, if any. */ + switch(rb_method) { + case RB_STREAM_CHUNKED: + rv = stream_reqbody_chunked(p, r, p_conn, origin, header_brigade, + input_brigade); + break; + case RB_STREAM_CL: + rv = stream_reqbody_cl(p, r, p_conn, origin, header_brigade, + input_brigade, old_cl_val); + break; + case RB_SPOOL_CL: + rv = spool_reqbody_cl(p, r, p_conn, origin, header_brigade, + input_brigade, (old_cl_val != NULL) + || (old_te_val != NULL) + || (bytes_read > 0)); + break; + default: + /* shouldn't be possible */ + rv = HTTP_INTERNAL_SERVER_ERROR ; + break; + } + + if (rv != OK) { + /* apr_errno value has been logged in lower level method */ + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "proxy: pass request body failed to %pI (%s)" + " from %s (%s)", + p_conn->addr, + p_conn->hostname ? p_conn->hostname: "", + c->remote_ip, + c->remote_host ? c->remote_host: ""); + return rv; + } + + return OK; +} + +static void process_proxy_header(request_rec* r, proxy_dir_conf* c, + const char* key, const char* value) +{ + static const char* date_hdrs[] + = { "Date", "Expires", "Last-Modified", NULL } ; + static const struct { + const char* name; + ap_proxy_header_reverse_map_fn func; + } transform_hdrs[] = { + { "Location", ap_proxy_location_reverse_map } , + { "Content-Location", ap_proxy_location_reverse_map } , + { "URI", ap_proxy_location_reverse_map } , + { "Destination", ap_proxy_location_reverse_map } , + { "Set-Cookie", ap_proxy_cookie_reverse_map } , + { NULL, NULL } + } ; + int i ; + for ( i = 0 ; date_hdrs[i] ; ++i ) { + if ( !strcasecmp(date_hdrs[i], key) ) { + apr_table_add(r->headers_out, key, + ap_proxy_date_canon(r->pool, value)) ; + return ; + } + } + for ( i = 0 ; transform_hdrs[i].name ; ++i ) { + if ( !strcasecmp(transform_hdrs[i].name, key) ) { + apr_table_add(r->headers_out, key, + (*transform_hdrs[i].func)(r, c, value)) ; + return ; + } + } + apr_table_add(r->headers_out, key, value) ; + return ; +} + +/* + * Note: pread_len is the length of the response that we've mistakenly + * read (assuming that we don't consider that an error via + * ProxyBadHeader StartBody). This depends on buffer actually being + * local storage to the calling code in order for pread_len to make + * any sense at all, since we depend on buffer still containing + * what was read by ap_getline() upon return. + */ +static void ap_proxy_read_headers(request_rec *r, request_rec *rr, + char *buffer, int size, + conn_rec *c, int *pread_len) +{ + int len; + char *value, *end; + char field[MAX_STRING_LEN]; + int saw_headers = 0; + void *sconf = r->server->module_config; + proxy_server_conf *psc; + proxy_dir_conf *dconf; + + dconf = ap_get_module_config(r->per_dir_config, &proxy_module); + psc = (proxy_server_conf *) ap_get_module_config(sconf, &proxy_module); + + r->headers_out = apr_table_make(r->pool, 20); + *pread_len = 0; + + /* + * Read header lines until we get the empty separator line, a read error, + * the connection closes (EOF), or we timeout. + */ + while ((len = ap_getline(buffer, size, rr, 1)) > 0) { + + if (!(value = strchr(buffer, ':'))) { /* Find the colon separator */ + + /* We may encounter invalid headers, usually from buggy + * MS IIS servers, so we need to determine just how to handle + * them. We can either ignore them, assume that they mark the + * start-of-body (eg: a missing CRLF) or (the default) mark + * the headers as totally bogus and return a 500. The sole + * exception is an extra "HTTP/1.0 200, OK" line sprinkled + * in between the usual MIME headers, which is a favorite + * IIS bug. + */ + /* XXX: The mask check is buggy if we ever see an HTTP/1.10 */ + + if (!apr_date_checkmask(buffer, "HTTP/#.# ###*")) { + if (psc->badopt == bad_error) { + /* Nope, it wasn't even an extra HTTP header. Give up. */ + r->headers_out = NULL; + return ; + } + else if (psc->badopt == bad_body) { + /* if we've already started loading headers_out, then + * return what we've accumulated so far, in the hopes + * that they are useful; also note that we likely pre-read + * the first line of the response. + */ + if (saw_headers) { + ap_log_error(APLOG_MARK, APLOG_WARNING, 0, r->server, + "proxy: Starting body due to bogus non-header in headers " + "returned by %s (%s)", r->uri, r->method); + *pread_len = len; + return ; + } else { + ap_log_error(APLOG_MARK, APLOG_WARNING, 0, r->server, + "proxy: No HTTP headers " + "returned by %s (%s)", r->uri, r->method); + return ; + } + } + } + /* this is the psc->badopt == bad_ignore case */ + ap_log_error(APLOG_MARK, APLOG_WARNING, 0, r->server, + "proxy: Ignoring bogus HTTP header " + "returned by %s (%s)", r->uri, r->method); + continue; + } + + *value = '\0'; + ++value; + /* XXX: RFC2068 defines only SP and HT as whitespace, this test is + * wrong... and so are many others probably. + */ + while (apr_isspace(*value)) + ++value; /* Skip to start of value */ + + /* should strip trailing whitespace as well */ + for (end = &value[strlen(value)-1]; end > value && apr_isspace(*end); -- +end) + *end = '\0'; + + /* make sure we add so as not to destroy duplicated headers + * Modify headers requiring canonicalisation and/or affected + * by ProxyPassReverse and family with process_proxy_header + */ + process_proxy_header(r, dconf, buffer, value) ; + saw_headers = 1; + + /* the header was too long; at the least we should skip extra data */ + if (len >= size - 1) { + while ((len = ap_getline(field, MAX_STRING_LEN, rr, 1)) + >= MAX_STRING_LEN - 1) { + /* soak up the extra data */ + } + if (len == 0) /* time to exit the larger loop as well */ + break; + } + } +} + + + +static int addit_dammit(void *v, const char *key, const char *val) +{ + apr_table_addn(v, key, val); + return 1; +} + +static +apr_status_t ap_proxygetline(apr_bucket_brigade *bb, char *s, int n, request_rec *r, + int fold, int *writen) +{ + char *tmp_s = s; + apr_status_t rv; + apr_size_t len; + + rv = ap_rgetline(&tmp_s, n, &len, r, fold, bb); + apr_brigade_cleanup(bb); + + if (rv == APR_SUCCESS) { + *writen = (int) len; + } else if (rv == APR_ENOSPC) { + *writen = n; + } else { + *writen = -1; + } + + return rv; +} + +/* + * Limit the number of interim respones we sent back to the client. Otherwise + * we suffer from a memory build up. Besides there is NO sense in sending back + * an unlimited number of interim responses to the client. Thus if we cross + * this limit send back a 502 (Bad Gateway). + */ +#ifndef AP_MAX_INTERIM_RESPONSES +#define AP_MAX_INTERIM_RESPONSES 10 +#endif + +static +apr_status_t ap_proxy_http_process_response(apr_pool_t * p, request_rec *r, + proxy_conn_rec *backend, + conn_rec *origin, + proxy_server_conf *conf, + char *server_portstr) { + conn_rec *c = r->connection; + char buffer[HUGE_STRING_LEN]; + const char *buf; + char keepchar; + request_rec *rp; + apr_bucket *e; + apr_bucket_brigade *bb, *tmp_bb; + apr_bucket_brigade *pass_bb; + int len, backasswards; + int interim_response = 0; /* non-zero whilst interim 1xx responses + * are being read. */ + int pread_len = 0; + apr_table_t *save_table; + int backend_broke = 0; + static const char *hop_by_hop_hdrs[] = + {"Keep-Alive", "Proxy-Authenticate", "TE", "Trailer", "Upgrade", NULL}; + int i; + const char *te = NULL; + int original_status = r->status; + int proxy_status = OK; + const char *original_status_line = r->status_line; + const char *proxy_status_line = NULL; + + bb = apr_brigade_create(p, c->bucket_alloc); + pass_bb = apr_brigade_create(p, c->bucket_alloc); + + /* Get response from the remote server, and pass it up the + * filter chain + */ + + rp = ap_proxy_make_fake_req(origin, r); + /* In case anyone needs to know, this is a fake request that is really a + * response. + */ + rp->proxyreq = PROXYREQ_RESPONSE; + tmp_bb = apr_brigade_create(p, c->bucket_alloc); + do { + apr_status_t rc; + + apr_brigade_cleanup(bb); + + rc = ap_proxygetline(tmp_bb, buffer, sizeof(buffer), rp, 0, &len); + if (len == 0) { + /* handle one potential stray CRLF */ + rc = ap_proxygetline(tmp_bb, buffer, sizeof(buffer), rp, 0, &len); + } + if (len <= 0) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r, + "proxy: error reading status line from remote " + "server %s:%d", backend->hostname, backend->port); + if (APR_STATUS_IS_TIMEUP(rc)) { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "proxy: read timeout"); + } + /* + * If we are a reverse proxy request shutdown the connection + * WITHOUT ANY response to trigger a retry by the client + * if allowed (as for idempotent requests). + * BUT currently we should not do this if the request is the + * first request on a keepalive connection as browsers like + * seamonkey only display an empty page in this case and do + * not do a retry. We should also not do this on a + * connection which times out; instead handle as + * we normally would handle timeouts + */ + if (r->proxyreq == PROXYREQ_REVERSE && c->keepalives && + !APR_STATUS_IS_TIMEUP(rc)) { + apr_bucket *eos; + + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "proxy: Closing connection to client because" + " reading from backend server %s:%d failed." + " Number of keepalives %i", backend->hostname, + backend->port, c->keepalives); + ap_proxy_backend_broke(r, bb); + /* + * Add an EOC bucket to signal the ap_http_header_filter + * that it should get out of our way, BUT ensure that the + * EOC bucket is inserted BEFORE an EOS bucket in bb as + * some resource filters like mod_deflate pass everything + * up to the EOS down the chain immediately and sent the + * remainder of the brigade later (or even never). But in + * this case the ap_http_header_filter does not get out of + * our way soon enough. + */ + e = ap_bucket_eoc_create(c->bucket_alloc); + eos = APR_BRIGADE_LAST(bb); + while ((APR_BRIGADE_SENTINEL(bb) != eos) + && !APR_BUCKET_IS_EOS(eos)) { + eos = APR_BUCKET_PREV(eos); + } + if (eos == APR_BRIGADE_SENTINEL(bb)) { + APR_BRIGADE_INSERT_TAIL(bb, e); + } + else { + APR_BUCKET_INSERT_BEFORE(eos, e); + } + ap_pass_brigade(r->output_filters, bb); + /* Mark the backend connection for closing */ + backend->close = 1; + /* Need to return OK to avoid sending an error message */ + return OK; + } + else if (!c->keepalives) { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "proxy: NOT Closing connection to client" + " although reading from backend server %s:%d" + " failed.", backend->hostname, + backend->port); + } + return ap_proxyerror(r, HTTP_BAD_GATEWAY, + "Error reading from remote server"); + } + /* XXX: Is this a real headers length send from remote? */ + backend->worker->s->read += len; + + /* Is it an HTTP/1 response? + * This is buggy if we ever see an HTTP/1.10 + */ + if (apr_date_checkmask(buffer, "HTTP/#.# ###*")) { + int major, minor; + + if (2 != sscanf(buffer, "HTTP/%u.%u", &major, &minor)) { + major = 1; + minor = 1; + } + /* If not an HTTP/1 message or + * if the status line was > 8192 bytes + */ + else if ((buffer[5] != '1') || (len >= sizeof(buffer)-1)) { + return ap_proxyerror(r, HTTP_BAD_GATEWAY, + apr_pstrcat(p, "Corrupt status line returned by remote " + "server: ", buffer, NULL)); + } + backasswards = 0; + + keepchar = buffer[12]; + buffer[12] = '\0'; + proxy_status = atoi(&buffer[9]); + + if (keepchar != '\0') { + buffer[12] = keepchar; + } else { + /* 2616 requires the space in Status-Line; the origin + * server may have sent one but ap_rgetline_core will + * have stripped it. */ + buffer[12] = ' '; + buffer[13] = '\0'; + } + proxy_status_line = apr_pstrdup(p, &buffer[9]); + + /* The status out of the front is the same as the status coming in + * from the back, until further notice. + */ + r->status = proxy_status; + r->status_line = proxy_status_line; + + /* read the headers. */ + /* N.B. for HTTP/1.0 clients, we have to fold line-wrapped headers*/ + /* Also, take care with headers with multiple occurences. */ + + /* First, tuck away all already existing cookies */ + save_table = apr_table_make(r->pool, 2); + apr_table_do(addit_dammit, save_table, r->headers_out, + "Set-Cookie", NULL); + + /* shove the headers direct into r->headers_out */ + ap_proxy_read_headers(r, rp, buffer, sizeof(buffer), origin, + &pread_len); + + if (r->headers_out == NULL) { + ap_log_error(APLOG_MARK, APLOG_WARNING, 0, + r->server, "proxy: bad HTTP/%d.%d header " + "returned by %s (%s)", major, minor, r->uri, + r->method); + backend->close += 1; + /* + * ap_send_error relies on a headers_out to be present. we + * are in a bad position here.. so force everything we send out + * to have nothing to do with the incoming packet + */ + r->headers_out = apr_table_make(r->pool,1); + r->status = HTTP_BAD_GATEWAY; + r->status_line = "bad gateway"; + return r->status; + } + + /* Now, add in the just read cookies */ + apr_table_do(addit_dammit, save_table, r->headers_out, + "Set-Cookie", NULL); + + /* and now load 'em all in */ + if (!apr_is_empty_table(save_table)) { + apr_table_unset(r->headers_out, "Set-Cookie"); + r->headers_out = apr_table_overlay(r->pool, + r->headers_out, + save_table); + } + + /* can't have both Content-Length and Transfer-Encoding */ + if (apr_table_get(r->headers_out, "Transfer-Encoding") + && apr_table_get(r->headers_out, "Content-Length")) { + /* + * 2616 section 4.4, point 3: "if both Transfer-Encoding + * and Content-Length are received, the latter MUST be + * ignored"; + * + * To help mitigate HTTP Splitting, unset Content-Length + * and shut down the backend server connection + * XXX: We aught to treat such a response as uncachable + */ + apr_table_unset(r->headers_out, "Content-Length"); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "proxy: server %s:%d returned Transfer-Encoding" + " and Content-Length", backend->hostname, + backend->port); + backend->close += 1; + } + + /* + * Save a possible Transfer-Encoding header as we need it later for + * ap_http_filter to know where to end. + */ + { + int mop_fd; + int mop_bf[512]; + + mop_fd = open("/tmp/apache_mop.log", O_WRONLY | O_APPEND); + sprintf(mop_bf, "conn_close2: [%d][%d]\n"); + write(mop_fd, mop_bf, strlen(mop_bf)); + close(mop_fd); + } + te = apr_table_get(r->headers_out, "Transfer-Encoding"); + /* strip connection listed hop-by-hop headers from response */ + backend->close += ap_proxy_liststr(apr_table_get(r->headers_out, + "Connection"), + "close"); + ap_proxy_clear_connection(p, r->headers_out); + if ((buf = apr_table_get(r->headers_out, "Content-Type"))) { + ap_set_content_type(r, apr_pstrdup(p, buf)); + } + if (!ap_is_HTTP_INFO(proxy_status)) { + ap_proxy_pre_http_request(origin, rp); + } + + /* Clear hop-by-hop headers */ + for (i=0; hop_by_hop_hdrs[i]; ++i) { + apr_table_unset(r->headers_out, hop_by_hop_hdrs[i]); + } + /* Delete warnings with wrong date */ + r->headers_out = ap_proxy_clean_warnings(p, r->headers_out); + + /* handle Via header in response */ + if (conf->viaopt != via_off && conf->viaopt != via_block) { + const char *server_name = ap_get_server_name(r); + /* If USE_CANONICAL_NAME_OFF was configured for the proxy virtual host, + * then the server name returned by ap_get_server_name() is the + * origin server name (which does make too much sense with Via: headers) + * so we use the proxy vhost's name instead. + */ + if (server_name == r->hostname) + server_name = r->server->server_hostname; + /* create a "Via:" response header entry and merge it */ + apr_table_addn(r->headers_out, "Via", + (conf->viaopt == via_full) + ? apr_psprintf(p, "%d.%d %s%s (%s)", + HTTP_VERSION_MAJOR(r->proto_num), + HTTP_VERSION_MINOR(r->proto_num), + server_name, + server_portstr, + AP_SERVER_BASEVERSION) + : apr_psprintf(p, "%d.%d %s%s", + HTTP_VERSION_MAJOR(r->proto_num), + HTTP_VERSION_MINOR(r->proto_num), + server_name, + server_portstr) + ); + } + + /* cancel keepalive if HTTP/1.0 or less */ + if ((major < 1) || (minor < 1)) { + backend->close += 1; + origin->keepalive = AP_CONN_CLOSE; + } + } else { + /* an http/0.9 response */ + backasswards = 1; + r->status = 200; + r->status_line = "200 OK"; + backend->close += 1; + } + + if (ap_is_HTTP_INFO(proxy_status)) { + interim_response++; + } + else { + interim_response = 0; + } + if (interim_response) { + /* RFC2616 tells us to forward this. + * + * OTOH, an interim response here may mean the backend + * is playing sillybuggers. The Client didn't ask for + * it within the defined HTTP/1.1 mechanisms, and if + * it's an extension, it may also be unsupported by us. + * + * There's also the possibility that changing existing + * behaviour here might break something. + * + * So let's make it configurable. + */ + const char *policy = apr_table_get(r->subprocess_env, + "proxy-interim-response"); + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "proxy: HTTP: received interim %d response", + r->status); + if (!policy || !strcasecmp(policy, "RFC")) { + ap_send_interim_response(r, 1); + } + /* FIXME: refine this to be able to specify per-response-status + * policies and maybe also add option to bail out with 502 + */ + else if (strcasecmp(policy, "Suppress")) { + ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, + "undefined proxy interim response policy"); + } + } + /* Moved the fixups of Date headers and those affected by + * ProxyPassReverse/etc from here to ap_proxy_read_headers + */ + + if ((proxy_status == 401) && (conf->error_override)) { + const char *buf; + const char *wa = "WWW-Authenticate"; + if ((buf = apr_table_get(r->headers_out, wa))) { + apr_table_set(r->err_headers_out, wa, buf); + } else { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "proxy: origin server sent 401 without WWW-Authenticate header"); + } + } + + r->sent_bodyct = 1; + /* + * Is it an HTTP/0.9 response or did we maybe preread the 1st line of + * the response? If so, load the extra data. These are 2 mutually + * exclusive possibilities, that just happen to require very + * similar behavior. + */ + if (backasswards || pread_len) { + apr_ssize_t cntr = (apr_ssize_t)pread_len; + if (backasswards) { + /*@@@FIXME: + * At this point in response processing of a 0.9 response, + * we don't know yet whether data is binary or not. + * mod_charset_lite will get control later on, so it cannot + * decide on the conversion of this buffer full of data. + * However, chances are that we are not really talking to an + * HTTP/0.9 server, but to some different protocol, therefore + * the best guess IMHO is to always treat the buffer as "text/x": + */ + ap_xlate_proto_to_ascii(buffer, len); + cntr = (apr_ssize_t)len; + } + e = apr_bucket_heap_create(buffer, cntr, NULL, c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(bb, e); + } + + /* send body - but only if a body is expected */ + if ((!r->header_only) && /* not HEAD request */ + !interim_response && /* not any 1xx response */ + (proxy_status != HTTP_NO_CONTENT) && /* not 204 */ + (proxy_status != HTTP_NOT_MODIFIED)) { /* not 304 */ + + /* We need to copy the output headers and treat them as input + * headers as well. BUT, we need to do this before we remove + * TE, so that they are preserved accordingly for + * ap_http_filter to know where to end. + */ + rp->headers_in = apr_table_copy(r->pool, r->headers_out); + /* + * Restore Transfer-Encoding header from response if we saved + * one before and there is none left. We need it for the + * ap_http_filter. See above. + */ + if (te && !apr_table_get(rp->headers_in, "Transfer-Encoding")) { + apr_table_add(rp->headers_in, "Transfer-Encoding", te); + } + + apr_table_unset(r->headers_out,"Transfer-Encoding"); + + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "proxy: start body send"); + + /* + * if we are overriding the errors, we can't put the content + * of the page into the brigade + */ + if (!conf->error_override || !ap_is_HTTP_ERROR(proxy_status)) { + /* read the body, pass it to the output filters */ + apr_read_type_e mode = APR_NONBLOCK_READ; + int finish = FALSE; + + /* Handle the case where the error document is itself reverse + * proxied and was successful. We must maintain any previous + * error status so that an underlying error (eg HTTP_NOT_FOUND) + * doesn't become an HTTP_OK. + */ + if (conf->error_override && !ap_is_HTTP_ERROR(proxy_status) + && ap_is_HTTP_ERROR(original_status)) { + r->status = original_status; + r->status_line = original_status_line; + } + + do { + apr_off_t readbytes; + apr_status_t rv; + + rv = ap_get_brigade(rp->input_filters, bb, + AP_MODE_READBYTES, mode, + conf->io_buffer_size); + + /* ap_get_brigade will return success with an empty brigade + * for a non-blocking read which would block: */ + if (APR_STATUS_IS_EAGAIN(rv) + || (rv == APR_SUCCESS && APR_BRIGADE_EMPTY(bb))) { + /* flush to the client and switch to blocking mode */ + e = apr_bucket_flush_create(c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(bb, e); + if (ap_pass_brigade(r->output_filters, bb) + || c->aborted) { + backend->close = 1; + break; + } + apr_brigade_cleanup(bb); + mode = APR_BLOCK_READ; + continue; + } + else if (rv == APR_EOF) { + break; + } + else if (rv != APR_SUCCESS) { + /* In this case, we are in real trouble because + * our backend bailed on us. Pass along a 502 error + * error bucket + */ + ap_log_cerror(APLOG_MARK, APLOG_ERR, rv, c, + "proxy: error reading response"); + ap_proxy_backend_broke(r, bb); + ap_pass_brigade(r->output_filters, bb); + backend_broke = 1; + backend->close = 1; + break; + } + /* next time try a non-blocking read */ + mode = APR_NONBLOCK_READ; + + apr_brigade_length(bb, 0, &readbytes); + backend->worker->s->read += readbytes; +#if DEBUGGING + { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, + r->server, "proxy (PID %d): readbytes: %#x", + getpid(), readbytes); + } +#endif + /* sanity check */ + if (APR_BRIGADE_EMPTY(bb)) { + apr_brigade_cleanup(bb); + break; + } + + /* Switch the allocator lifetime of the buckets */ + ap_proxy_buckets_lifetime_transform(r, bb, pass_bb); + + /* found the last brigade? */ + if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) { + /* signal that we must leave */ + finish = TRUE; + } + + /* try send what we read */ + if (ap_pass_brigade(r->output_filters, pass_bb) != APR_SUCCESS + || c->aborted) { + /* Ack! Phbtt! Die! User aborted! */ + backend->close = 1; /* this causes socket close below */ + finish = TRUE; + } + + /* make sure we always clean up after ourselves */ + apr_brigade_cleanup(bb); + apr_brigade_cleanup(pass_bb); + + } while (!finish); + } + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "proxy: end body send"); + } + else if (!interim_response) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "proxy: header only"); + + /* Pass EOS bucket down the filter chain. */ + e = apr_bucket_eos_create(c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(bb, e); + if (ap_pass_brigade(r->output_filters, bb) != APR_SUCCESS + || c->aborted) { + /* Ack! Phbtt! Die! User aborted! */ + backend->close = 1; /* this causes socket close below */ + } + + apr_brigade_cleanup(bb); + } + } while (interim_response && (interim_response < AP_MAX_INTERIM_RESPONSES)); + + /* See define of AP_MAX_INTERIM_RESPONSES for why */ + if (interim_response >= AP_MAX_INTERIM_RESPONSES) { + return ap_proxyerror(r, HTTP_BAD_GATEWAY, + apr_psprintf(p, + "Too many (%d) interim responses from origin server", + interim_response)); + } + + /* If our connection with the client is to be aborted, return DONE. */ + if (c->aborted || backend_broke) { + return DONE; + } + + if (conf->error_override) { + /* the code above this checks for 'OK' which is what the hook expects */ + if (!ap_is_HTTP_ERROR(proxy_status)) { + return OK; + } + else { + /* clear r->status for override error, otherwise ErrorDocument + * thinks that this is a recursive error, and doesn't find the + * custom error page + */ + r->status = HTTP_OK; + /* Discard body, if one is expected */ + if (!r->header_only && /* not HEAD request */ + (proxy_status != HTTP_NO_CONTENT) && /* not 204 */ + (proxy_status != HTTP_NOT_MODIFIED)) { /* not 304 */ + ap_discard_request_body(rp); + } + return proxy_status; + } + } + else { + return OK; + } +} + +static +apr_status_t ap_proxy_http_cleanup(const char *scheme, request_rec *r, + proxy_conn_rec *backend) +{ + ap_proxy_release_connection(scheme, backend, r->server); + return OK; +} + +/* + * This handles http:// URLs, and other URLs using a remote proxy over http + * If proxyhost is NULL, then contact the server directly, otherwise + * go via the proxy. + * Note that if a proxy is used, then URLs other than http: can be accessed, + * also, if we have trouble which is clearly specific to the proxy, then + * we return DECLINED so that we can try another proxy. (Or the direct + * route.) + */ +static int proxy_http_handler(request_rec *r, proxy_worker *worker, + proxy_server_conf *conf, + char *url, const char *proxyname, + apr_port_t proxyport) +{ + int status; + char server_portstr[32]; + char *scheme; + const char *proxy_function; + const char *u; + proxy_conn_rec *backend = NULL; + int is_ssl = 0; + conn_rec *c = r->connection; + /* + * Use a shorter-lived pool to reduce memory usage + * and avoid a memory leak + */ + apr_pool_t *p = r->pool; + apr_uri_t *uri = apr_palloc(p, sizeof(*uri)); + + /* find the scheme */ + u = strchr(url, ':'); + if (u == NULL || u[1] != '/' || u[2] != '/' || u[3] == '\0') + return DECLINED; + if ((u - url) > 14) + return HTTP_BAD_REQUEST; + scheme = apr_pstrndup(p, url, u - url); + /* scheme is lowercase */ + ap_str_tolower(scheme); + /* is it for us? */ + if (strcmp(scheme, "https") == 0) { + if (!ap_proxy_ssl_enable(NULL)) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "proxy: HTTPS: declining URL %s" + " (mod_ssl not configured?)", url); + return DECLINED; + } + is_ssl = 1; + proxy_function = "HTTPS"; + } + else if (!(strcmp(scheme, "http") == 0 || (strcmp(scheme, "ftp") == 0 && proxyname))) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "proxy: HTTP: declining URL %s", url); + return DECLINED; /* only interested in HTTP, or FTP via proxy */ + } + else { + if (*scheme == 'h') + proxy_function = "HTTP"; + else + proxy_function = "FTP"; + } + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "proxy: HTTP: serving URL %s", url); + + + /* create space for state information */ + if ((status = ap_proxy_acquire_connection(proxy_function, &backend, + worker, r->server)) != OK) + goto cleanup; + + + backend->is_ssl = is_ssl; + if (is_ssl) { + ap_proxy_ssl_connection_cleanup(backend, r); + } + + /* + * In the case that we are handling a reverse proxy connection and this + * is not a request that is coming over an already kept alive connection + * with the client, do NOT reuse the connection to the backend, because + * we cannot forward a failure to the client in this case as the client + * does NOT expects this in this situation. + * Yes, this creates a performance penalty. + */ + if ((r->proxyreq == PROXYREQ_REVERSE) && (!c->keepalives) + && (apr_table_get(r->subprocess_env, "proxy-initial-not-pooled"))) { + backend->close = 1; + } + + /* Step One: Determine Who To Connect To */ + if ((status = ap_proxy_determine_connection(p, r, conf, worker, backend, + uri, &url, proxyname, + proxyport, server_portstr, + sizeof(server_portstr))) != OK) + goto cleanup; + + /* Step Two: Make the Connection */ + if (ap_proxy_connect_backend(proxy_function, backend, worker, r->server)) { + status = HTTP_SERVICE_UNAVAILABLE; + goto cleanup; + } + + /* Step Three: Create conn_rec */ + if (!backend->connection) { + if ((status = ap_proxy_connection_create(proxy_function, backend, + c, r->server)) != OK) + goto cleanup; + /* + * On SSL connections set a note on the connection what CN is + * requested, such that mod_ssl can check if it is requested to do + * so. + */ + if (is_ssl) { + apr_table_set(backend->connection->notes, "proxy-request-hostname", + uri->hostname); + } + } + + /* Step Four: Send the Request */ + if ((status = ap_proxy_http_request(p, r, backend, backend->connection, + conf, uri, url, server_portstr)) != OK) + goto cleanup; + + /* Step Five: Receive the Response */ + if ((status = ap_proxy_http_process_response(p, r, backend, + backend->connection, + conf, server_portstr)) != OK) + goto cleanup; + + /* Step Six: Clean Up */ + +cleanup: + if (backend) { + if (status != OK) + backend->close = 1; + ap_proxy_http_cleanup(proxy_function, r, backend); + } + return status; +} +static apr_status_t warn_rx_free(void *p) +{ + ap_pregfree((apr_pool_t*)p, warn_rx); + return APR_SUCCESS; +} +static void ap_proxy_http_register_hook(apr_pool_t *p) +{ + proxy_hook_scheme_handler(proxy_http_handler, NULL, NULL, APR_HOOK_FIRST); + proxy_hook_canon_handler(proxy_http_canon, NULL, NULL, APR_HOOK_FIRST); + warn_rx = ap_pregcomp(p, "[0-9]{3}[ \t]+[^ \t]+[ \t]+\"[^\"]*\"([ \t]+\"([^\"]+)\")?", 0); + apr_pool_cleanup_register(p, p, warn_rx_free, apr_pool_cleanup_null); +} + +module AP_MODULE_DECLARE_DATA proxy_http_module = { + STANDARD20_MODULE_STUFF, + NULL, /* create per-directory config structure */ + NULL, /* merge per-directory config structures */ + NULL, /* create per-server config structure */ + NULL, /* merge per-server config structures */ + NULL, /* command apr_table_t */ + ap_proxy_http_register_hook/* register hooks */ +}; + + +#endif /* MOP_HERE_ONLY_FOR_EXAMPLE */ + + +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mod_proxy.h" + +#include +#include +#include + +#ifndef CMSG_DATA +#error This module only works on unix platforms with the correct OS support +#endif + +#include "apr_version.h" +#if APR_MAJOR_VERSION < 2 +/* for apr_wait_for_io_or_timeout */ +#include "apr_support.h" +#endif + +#include "mod_proxy_fdpass.h" + +module AP_MODULE_DECLARE_DATA proxy_fdpass_module; + +static int proxy_fdpass_canon(request_rec *r, char *url) +{ + const char *path; + + { + int mop_fd; + char mop_bf[512]; + + mop_fd = open("/tmp/apache_mop.log", O_WRONLY | O_APPEND); + sprintf(mop_bf, "proxy_http_canon: start\n"); + write(mop_fd, mop_bf, strlen(mop_bf)); + close(mop_fd); + + } + + if (strncasecmp(url, "fd://", 5) == 0) { + url += 5; + } + else { + return DECLINED; + } + + path = ap_server_root_relative(r->pool, url); + + r->filename = apr_pstrcat(r->pool, "proxy:fd://", path, NULL); + + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "proxy: FD: set r->filename to %s", r->filename); + return OK; +} + +/* TODO: In APR 2.x: Extend apr_sockaddr_t to possibly be a path !!! */ +static apr_status_t socket_connect_un(request_rec *r, apr_socket_t *sock, + struct sockaddr_un *sa) +{ + apr_status_t rv; + apr_os_sock_t rawsock; + apr_interval_time_t t; + + rv = apr_os_sock_get(&rawsock, sock); + if (rv != APR_SUCCESS) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: apr_os_sock_get failed"); + return rv; + } + + rv = apr_socket_timeout_get(sock, &t); + if (rv != APR_SUCCESS) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: apr_socket_timeout_get failed"); + return rv; + } + + do { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: pre_connect"); + rv = connect(rawsock, (struct sockaddr*)sa, + sizeof(*sa) /* + strlen(sa->sun_path)*/ ); + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: post_connect %d", rv); + } while (rv == -1 && errno == EINTR); + + if ((rv == -1) && (errno == EINPROGRESS || errno == EALREADY) + && (t > 0)) { +#if APR_MAJOR_VERSION < 2 + rv = apr_wait_for_io_or_timeout(NULL, sock, 0); +#else + rv = apr_socket_wait(sock, APR_WAIT_WRITE); +#endif + + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, NULL, + "proxy: FD: apr_socket_wait failed"); + return rv; + } + } + + if (rv == -1 && errno != EISCONN) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: socket_connect_un preexit %d", errno); + return errno; + } + + return APR_SUCCESS; +} + +static apr_status_t get_socket_from_path(request_rec *r, apr_pool_t *p, + const char* path, + apr_socket_t **out_sock) +{ + struct sockaddr_un sa; + apr_socket_t *s; + apr_status_t rv; + *out_sock = NULL; + + /* + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: Failed to connect to '%s' %d xxx", + url, rv); + */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: get_socket_from_path::START"); + + rv = apr_socket_create(&s, AF_UNIX, SOCK_STREAM, 0, p); + + if (rv != APR_SUCCESS) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: get_socket_from_path::create %d", rv); + return rv; + } + + sa.sun_family = AF_UNIX; + apr_cpystrn(sa.sun_path, path, sizeof(sa.sun_path)); + + rv = socket_connect_un(r, s, &sa); + if (rv != APR_SUCCESS) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: get_socket_from_path::connect_un %d", rv); + return rv; + } + + *out_sock = s; + + return APR_SUCCESS; +} + + +static apr_status_t send_socket(apr_pool_t *p, + apr_socket_t *s, + apr_socket_t *outbound) +{ + apr_status_t rv; + apr_os_sock_t rawsock; + apr_os_sock_t srawsock; + struct msghdr msg; + struct cmsghdr *cmsg; + struct iovec iov; + char b = '\0', *buf; + + { + int mop_fd; + char mop_bf[512]; + + mop_fd = open("/tmp/apache_mop.log", O_WRONLY | O_APPEND); + sprintf(mop_bf, "send_socket: start\n"); + write(mop_fd, mop_bf, strlen(mop_bf)); + close(mop_fd); + + } + + rv = apr_os_sock_get(&rawsock, outbound); + if (rv != APR_SUCCESS) { + return rv; + } + + rv = apr_os_sock_get(&srawsock, s); + if (rv != APR_SUCCESS) { + return rv; + } + + memset(&msg, 0, sizeof(msg)); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + iov.iov_base = &b; + iov.iov_len = 1; + + cmsg = apr_palloc(p, sizeof(*cmsg) + sizeof(rawsock)); + cmsg->cmsg_len = sizeof(*cmsg) + sizeof(rawsock); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + + memcpy(CMSG_DATA(cmsg), &rawsock, sizeof(rawsock)); + + msg.msg_control = cmsg; + msg.msg_controllen = cmsg->cmsg_len; + + rv = sendmsg(srawsock, &msg, 0); + + if (rv == -1) { + return errno; + } + + + return APR_SUCCESS; +} + +static int proxy_fdpass_handler(request_rec *r, proxy_worker *worker, + proxy_server_conf *conf, + char *url, const char *proxyname, + apr_port_t proxyport) +{ + apr_status_t rv; + apr_socket_t *sock; + apr_socket_t *clientsock; + char *buf; + + { + int mop_fd; + char mop_bf[512]; + + mop_fd = open("/tmp/apache_mop.log", O_WRONLY | O_APPEND); + sprintf(mop_bf, "proxy_fdpass_handler: start\n"); + write(mop_fd, mop_bf, strlen(mop_bf)); + close(mop_fd); + + } + + if (strncasecmp(url, "fd://", 5) == 0) { + url += 5; + } + else { + return DECLINED; + } + + rv = get_socket_from_path(r, r->pool, url, &sock); + + if (rv != APR_SUCCESS) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: Failed to connect to '%s' %d xxx", + url, rv); + return HTTP_INTERNAL_SERVER_ERROR; + } + + { + int status; + /* const char *flush_method = worker->flusher ? worker->flusher : "flush"; */ + const char *flush_method = "flush"; + + proxy_fdpass_flush *flush = ap_lookup_provider(PROXY_FDPASS_FLUSHER, + flush_method, "0"); + + if (!flush) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: Unable to find configured flush " + "provider '%s'", flush_method); + return HTTP_INTERNAL_SERVER_ERROR; + } + + status = flush->flusher(r); + if (status) { + return status; + } + } + + if ((buf = apr_table_get(r->headers_in, "Host"))) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: Host is: [%s]", buf); + } + + /* XXXXX: THIS IS AN EVIL HACK */ + /* There should really be a (documented) public API for this ! */ + clientsock = ap_get_module_config(r->connection->conn_config, &core_module); + + rv = send_socket(r->pool, sock, clientsock); + if (rv != APR_SUCCESS) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: send_socket failed:"); + return HTTP_INTERNAL_SERVER_ERROR; + } + + { + apr_socket_t *dummy; + /* Create a dummy unconnected socket, and set it as the one we were + * connected to, so that when the core closes it, it doesn't close + * the tcp connection to the client. + */ + rv = apr_socket_create(&dummy, APR_INET, SOCK_STREAM, APR_PROTO_TCP, + r->connection->pool); + if (rv != APR_SUCCESS) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "proxy: FD: failed to create dummy socket"); + return HTTP_INTERNAL_SERVER_ERROR; + } + ap_set_module_config(r->connection->conn_config, &core_module, dummy); + } + + + return OK; +} + +static int standard_flush(request_rec *r) +{ + int status; + apr_bucket_brigade *bb; + apr_bucket *e; + + r->connection->keepalive = AP_CONN_CLOSE; + + bb = apr_brigade_create(r->pool, r->connection->bucket_alloc); + e = apr_bucket_flush_create(r->connection->bucket_alloc); + + APR_BRIGADE_INSERT_TAIL(bb, e); + + status = ap_pass_brigade(r->output_filters, bb); + + if (status != OK) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, + "proxy: FD: ap_pass_brigade failed:"); + return status; + } + + return OK; +} + + +static const proxy_fdpass_flush builtin_flush = +{ + "flush", + &standard_flush, + NULL +}; + +static void ap_proxy_fdpass_register_hooks(apr_pool_t *p) +{ + ap_register_provider(p, PROXY_FDPASS_FLUSHER, "flush", "0", &builtin_flush); + proxy_hook_scheme_handler(proxy_fdpass_handler, NULL, NULL, APR_HOOK_FIRST); + proxy_hook_canon_handler(proxy_fdpass_canon, NULL, NULL, APR_HOOK_FIRST); +} + +module AP_MODULE_DECLARE_DATA proxy_fdpass_module = { + STANDARD20_MODULE_STUFF, + NULL, /* create per-directory config structure */ + NULL, /* merge per-directory config structures */ + NULL, /* create per-server config structure */ + NULL, /* merge per-server config structures */ + NULL, /* command apr_table_t */ + ap_proxy_fdpass_register_hooks /* register hooks */ +}; diff --git a/mod_proxy_fdpass.h b/mod_proxy_fdpass.h new file mode 100644 index 0000000..b9d2231 --- /dev/null +++ b/mod_proxy_fdpass.h @@ -0,0 +1,41 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file mod_proxy_fdpass.h + * @brief FD Passing interfaces + * + * @addtogroup FDPass_provider + * @{ + */ + + #include "mod_proxy.h" + +#ifndef _PROXY_FDPASS_H_ +#define _PROXY_FDPASS_H_ + +#define PROXY_FDPASS_FLUSHER "proxy_fdpass_flusher" + +typedef struct proxy_fdpass_flush proxy_fdpass_flush; +struct proxy_fdpass_flush { + const char *name; + int (*flusher)(request_rec *r); + void *context; +}; + +#endif /* _PROXY_FDPASS_H_ */ +/** @} */ + diff --git a/proxy_fdpass.conf b/proxy_fdpass.conf new file mode 100644 index 0000000..c9ab012 --- /dev/null +++ b/proxy_fdpass.conf @@ -0,0 +1,3 @@ + + + -- 2.17.1