diff options
| author | 2026-02-20 19:06:13 +0800 | |
|---|---|---|
| committer | 2026-02-20 19:07:14 +0800 | |
| commit | aa513c069c1418734aea894dc944e27c6a78a3bb (patch) | |
| tree | 687f0a11bb550fa088fd82a98ceb8979bbc35f69 | |
| parent | Comment on prior reverts removing the pack writing API (diff) | |
| signature | No signature | |
Delete everything, I'm redesigning this.
I'll stop using a flat package and make things much more modular.
And also experiment with streaming APIs so large blobs don't OOM us.
78 files changed, 0 insertions, 11459 deletions
diff --git a/.builds/alpine.yml b/.builds/alpine.yml deleted file mode 100644 index e0bd8ad3..00000000 --- a/.builds/alpine.yml +++ /dev/null @@ -1,29 +0,0 @@ -image: alpine/edge -packages: - - golangci-lint - - go -tasks: - - build: | - cd furgit - go build - - test-sha256: | - cd furgit - go test -v ./... - - test-sha1: | - cd furgit - go test -v -tags sha1 ./... - - test-race-sha256: | - cd furgit - go test -race -v ./... - - test-race-sha1: | - cd furgit - go test -race -v -tags sha1 ./... - - test-purego-sha256: | - cd furgit - go test -v -tags purego ./... - - lint: | - cd furgit - golangci-lint run ./... - - vet: | - cd furgit - go vet ./... diff --git a/LICENSE b/LICENSE deleted file mode 100644 index be3f7b28..00000000 --- a/LICENSE +++ /dev/null @@ -1,661 +0,0 @@ - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/> - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - <one line to give the program's name and a brief idea of what it does.> - Copyright (C) <year> <name of author> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <https://www.gnu.org/licenses/>. - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -<https://www.gnu.org/licenses/>. diff --git a/README.md b/README.md deleted file mode 100644 index d14fc81c..00000000 --- a/README.md +++ /dev/null @@ -1,198 +0,0 @@ -# Furgit - -[](https://builds.sr.ht/~runxiyu/furgit) -[](https://pkg.go.dev/codeberg.org/lindenii/furgit) - -Furgit is a fast Git library in pure Go -(and a little bit of optional Go Assembly). - -## Project status - -* Initial development -* Poor code quality -* Frequent breaking changes -* Do not use in production -* Will likely use [Semantic Versioning 2.0.0](https://semver.org/spec/v2.0.0.html) later - -## Current features - -* SHA-256 and SHA-1\ - (runtime supports both; tests are SHA-256 by default, - but the `sha1` build tag makes it test SHA-1) -* Reading loose objects -* Writing loose objects -* Reading packfiles -* General support for blobs, trees, commits, and tags - -## Future features - -* Compression algorithm agility -* Multi pack indexes -* Repack -* [commit-graph](https://git-scm.com/docs/commit-graph) -* Network protocols -* Reftables -* Large object promisors? -* Large binary database format or something -* And much more - -## General goals - -Furgit intends to be a general-purpose Git library. - -For now, Furgit primarily prioritize APIs and optimizations that are -likely to be used by software development forges and other -server-side usages; in particular, Furgit follows the needs of -[Villosa](https://villosa.lindenii.org/villosa//repos/villosa/) and -to some extent [tangled](https://tangled.org/@tangled.org/core). - -## Performance optimizations - -* Aggressive pooling of byte buffers -* Aggressive pooling of custom zlib readers -* Minor SIMD optimizations for Adler-32 -* Memory-mapping packfiles and their indexes - -## Performance - -See [gitbench](https://git.sr.ht/~runxiyu/gitbench) for details on methods. - -All tests below were run on `linux.git` with `HEAD` at `6da43bbeb6918164` -on a `Intel(R) Core(TM) i5-10210U CPU @ 1.60GHz`. - -| Task | [git](https://git-scm.com) | Furgit | [libgit2](https://libgit2.org) | [go-git](https://github.com/go-git/go-git) | -| - | - | - | - | - | -| Traversing all trees | 0.1s | 9s | 19s | 122s | -| Traversing the root tree | 4ms | 1ms | 11ms | 1800ms | - -**Note:** go-git is expected to perform much better after -[storage: filesystem/mmap, Add PackScanner to handle large repos](https://github.com/go-git/go-git/pull/1776). - -## Architectural considerations - -Furgit heavily relies on memory mappings of packfiles, and assume relatively -predictable fault handling behavior. In distributed systems, we advise *not* -using Furgit on top of distributed network filesystems such as CephFS or NFS; -consider solutions where redundancy and distributions belong *above* the Git -layer, e.g., using an RPC protocol over a set of Git nodes each running Furgit -on local repositories. - -## Dependencies - -* The standard library -* Some things from `golang.org/x` -* `github.com/cespare/xxhash/v2` (may move in-tree at some point) - -Some external code is also introduced and maintained in-tree. - -## Environment requirements - -A standard UNIX-like filesystem with -[syscall.Mmap](https://pkg.go.dev/syscall#Mmap) is expected. - -## Repos and mirrors - -* [Codeberg](https://codeberg.org/lindenii/furgit) (with the canonical issue tracker) -* [SourceHut mirror](https://git.sr.ht/~runxiyu/furgit) -* [tangled mirror](https://tangled.org/@runxiyu.tngl.sh/furgit) -* [GitHub mirror](https://github.com/runxiyu/furgit) - -## Community - -* [#lindenii](https://webirc.runxiyu.org/kiwiirc/#lindenii) - on [irc.runxiyu.org](https://irc.runxiyu.org) -* [#lindenii](https://web.libera.chat/#lindenii) - on [Libera.Chat](https://libera.chat) - -## History and lineage - -* I wrote Lindenii Forge -* I wrote [hare-git](https://codeberg.org/lindenii/hare-git) -* I wanted a faster Git library for - [Lindenii Villosa](https://codeberg.org/lindenii/villosa) - the next generation of Lindenii Forge -* I translated hare-git and put it into `internal/common/git` in Villosa -* I extracted it out into a general-purpose library, which is what we - have now -* I was thinking of names and I accidentally typed "git" as "fur" (i.e., left - shifted one key on my QWERTY keyboard), so, "Furgit" - -## Reporting bugs - -All problem/bug reports should include a reproduction recipe in form -of a Go program which starts out with an empty repository and runs a -series of Furgit functions/methods and/or Git commands to trigger the -problem, be it a crash or some other undesirable behavior. - -Please take this request very seriously; Ask for help with writing your -regression test before asking for your problem to be fixed. Time invested in -writing a regression test saves time wasted on back-and-forth discussion about -how the problem can be reproduced. A regression test will need to be written in -any case to verify a fix and prevent the problem from resurfacing. - -If writing an automated test really turns out to be impossible, please -explain in very clear terms how the problem can be reproduced. - -## License - -This project is licensed under the GNU Affero General Public License, -Version 3.0 only. - -Pursuant to Section 14 of the GNU Affero General Public License, Version 3.0, -[Runxi Yu](https://runxiyu.org) is hereby designated as the proxy who is -authorized to issue a public statement accepting any future version of the -GNU Affero General Public License for use with this Program. - -Therefore, notwithstanding the specification that this Program is licensed -under the GNU Affero General Public License, Version 3.0 only, a public -acceptance by the Designated Proxy of any subsequent version of the GNU Affero -General Public License shall permanently authorize the use of that accepted -version for this Program. - -For the purposes of the Developer Certificate of Origin, the "open source -license" refers to the GNU Affero General Public License, Version 3.0, with the -above proxy designation pursuant to Section 14. - -All contributors are required to "sign-off" their commits (using `git commit --s`) to indicate that they have agreed to the [Developer Certificate of -Origin](https://developercertificate.org), reproduced below. - -``` -Developer Certificate of Origin -Version 1.1 - -Copyright (C) 2004, 2006 The Linux Foundation and its contributors. -1 Letterman Drive -Suite D4700 -San Francisco, CA, 94129 - -Everyone is permitted to copy and distribute verbatim copies of this -license document, but changing it is not allowed. - - -Developer's Certificate of Origin 1.1 - -By making a contribution to this project, I certify that: - -(a) The contribution was created in whole or in part by me and I - have the right to submit it under the open source license - indicated in the file; or - -(b) The contribution is based upon previous work that, to the best - of my knowledge, is covered under an appropriate open source - license and I have the right under that license to submit that - work with modifications, whether created in whole or in part - by me, under the same open source license (unless I am - permitted to submit under a different license), as indicated - in the file; or - -(c) The contribution was provided directly to me by some other - person who certified (a), (b) or (c) and I have not modified - it. - -(d) I understand and agree that this project and the contribution - are public and that a record of the contribution (including all - personal information I submit with it, including my sign-off) is - maintained indefinitely and may be redistributed consistent with - this project or the open source license(s) involved. -``` diff --git a/TODO.md b/TODO.md deleted file mode 100644 index 5dd58cd4..00000000 --- a/TODO.md +++ /dev/null @@ -1,18 +0,0 @@ -## Internal to-do list - -* Revamp error handling completely. -* Consider adding repository methods that attempt to resolve objects - of a particular type. They would attempt to resolve the object's - header and return an error if the type mismatches; if it matches, - they continue from that point (passing along some state such as - the packLocation to avoid re-resolving the location from index - files). -* Consider making Ref an interface satisfied by concrete RefDetached, - RefSymbolic. -* Consider adding compression agility. -* There may be some cases where integer overflows are handled - incorrectly. -* Use https://pkg.go.dev/simd/archsimd@go1.26rc1 for SIMD instead of - assembly. -* Add a function to insert an entry into a tree. -* Study https://gitlab.com/groups/gitlab-org/-/epics/20716 diff --git a/cmd/show-object/main.go b/cmd/show-object/main.go deleted file mode 100644 index 591710d0..00000000 --- a/cmd/show-object/main.go +++ /dev/null @@ -1,39 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "log" - - "codeberg.org/lindenii/furgit" -) - -func main() { - repoPath := flag.String("r", "", "path to repo (.git or bare)") - ref := flag.String("h", "", "ref or hash") - flag.Parse() - - if *repoPath == "" || *ref == "" { - log.Fatal("must provide -r repo and -h ref/hash") - } - - repo, err := furgit.OpenRepository(*repoPath) - if err != nil { - log.Fatalf("open repo: %v", err) - } - defer func() { - _ = repo.Close() - }() - - h, err := repo.ResolveRefFully(*ref) - if err != nil { - log.Fatalf("resolve ref: %v", err) - } - - obj, err := repo.ReadObject(h.Hash) - if err != nil { - log.Fatalf("read object: %v", err) - } - - fmt.Printf("%#v\n", obj) -} diff --git a/config/config.go b/config/config.go deleted file mode 100644 index 1344c890..00000000 --- a/config/config.go +++ /dev/null @@ -1,498 +0,0 @@ -// Package config provides routines to parse Git configuration files. -package config - -import ( - "bufio" - "bytes" - "errors" - "fmt" - "io" - "strings" - "unicode" -) - -// Config holds all parsed configuration entries from a Git config file. -// -// A Config preserves the ordering of entries as they appeared in the source. -// -// Lookups are matched case-insensitively for section and key names, and -// subsections must match exactly. -// -// Includes aren't supported yet; they will be supported in a later revision. -type Config struct { - entries []ConfigEntry -} - -// ConfigEntry represents a single parsed configuration directive. -type ConfigEntry struct { - // The section name in canonical lowercase form. - Section string - // The subsection name, retaining the exact form parsed from the input. - Subsection string - // The key name in canonical lowercase form. - Key string - // The interpreted value of the configuration entry, including unescaped - // characters where appropriate. - Value string -} - -// ParseConfig reads and parses Git configuration entries from r. -func ParseConfig(r io.Reader) (*Config, error) { - parser := &configParser{ - reader: bufio.NewReader(r), - lineNum: 1, - } - return parser.parse() -} - -// Get retrieves the first value for a given section, optional subsection, and key. -// Returns an empty string if not found. -func (c *Config) Get(section, subsection, key string) string { - section = strings.ToLower(section) - key = strings.ToLower(key) - for _, entry := range c.entries { - if strings.EqualFold(entry.Section, section) && - entry.Subsection == subsection && - strings.EqualFold(entry.Key, key) { - return entry.Value - } - } - return "" -} - -// GetAll retrieves all values for a given section, optional subsection, and key. -func (c *Config) GetAll(section, subsection, key string) []string { - section = strings.ToLower(section) - key = strings.ToLower(key) - var values []string - for _, entry := range c.entries { - if strings.EqualFold(entry.Section, section) && - entry.Subsection == subsection && - strings.EqualFold(entry.Key, key) { - values = append(values, entry.Value) - } - } - return values -} - -// Entries returns a copy of all parsed configuration entries in the order they -// appeared. Modifying the returned slice does not affect the Config. -func (c *Config) Entries() []ConfigEntry { - result := make([]ConfigEntry, len(c.entries)) - copy(result, c.entries) - return result -} - -type configParser struct { - reader *bufio.Reader - lineNum int - currentSection string - currentSubsec string - peeked rune - hasPeeked bool -} - -func (p *configParser) parse() (*Config, error) { - cfg := &Config{} - - if err := p.skipBOM(); err != nil { - return nil, err - } - - for { - ch, err := p.nextChar() - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - - // Skip whitespace and newlines - if ch == '\n' || unicode.IsSpace(ch) { - continue - } - - // Comments - if ch == '#' || ch == ';' { - if err := p.skipToEOL(); err != nil && err != io.EOF { - return nil, err - } - continue - } - - // Section header - if ch == '[' { - if err := p.parseSection(); err != nil { - return nil, fmt.Errorf("furgit: config: line %d: %w", p.lineNum, err) - } - continue - } - - // Key-value pair - if unicode.IsLetter(ch) { - p.unreadChar(ch) - if err := p.parseKeyValue(cfg); err != nil { - return nil, fmt.Errorf("furgit: config: line %d: %w", p.lineNum, err) - } - continue - } - - return nil, fmt.Errorf("furgit: config: line %d: unexpected character %q", p.lineNum, ch) - } - - return cfg, nil -} - -func (p *configParser) nextChar() (rune, error) { - if p.hasPeeked { - p.hasPeeked = false - return p.peeked, nil - } - - ch, _, err := p.reader.ReadRune() - if err != nil { - return 0, err - } - - if ch == '\r' { - next, _, err := p.reader.ReadRune() - if err == nil && next == '\n' { - ch = '\n' - } else if err == nil { - // Weird but ok - _ = p.reader.UnreadRune() - } - } - - if ch == '\n' { - p.lineNum++ - } - - return ch, nil -} - -func (p *configParser) unreadChar(ch rune) { - p.peeked = ch - p.hasPeeked = true - if ch == '\n' && p.lineNum > 1 { - p.lineNum-- - } -} - -func (p *configParser) skipBOM() error { - first, _, err := p.reader.ReadRune() - if err == io.EOF { - return nil - } - if err != nil { - return err - } - if first != '\uFEFF' { - _ = p.reader.UnreadRune() - } - return nil -} - -func (p *configParser) skipToEOL() error { - for { - ch, err := p.nextChar() - if err != nil { - return err - } - if ch == '\n' { - return nil - } - } -} - -func (p *configParser) parseSection() error { - var name bytes.Buffer - - for { - ch, err := p.nextChar() - if err != nil { - return errors.New("unexpected EOF in section header") - } - - if ch == ']' { - section := name.String() - if !isValidSection(section) { - return fmt.Errorf("invalid section name: %q", section) - } - p.currentSection = strings.ToLower(section) - p.currentSubsec = "" - return nil - } - - if unicode.IsSpace(ch) { - return p.parseExtendedSection(&name) - } - - if !isKeyChar(ch) && ch != '.' { - return fmt.Errorf("invalid character in section name: %q", ch) - } - - name.WriteRune(unicode.ToLower(ch)) - } -} - -func (p *configParser) parseExtendedSection(sectionName *bytes.Buffer) error { - for { - ch, err := p.nextChar() - if err != nil { - return errors.New("unexpected EOF in section header") - } - if !unicode.IsSpace(ch) { - if ch != '"' { - return errors.New("expected quote after section name") - } - break - } - } - - var subsec bytes.Buffer - for { - ch, err := p.nextChar() - if err != nil { - return errors.New("unexpected EOF in subsection") - } - - if ch == '\n' { - return errors.New("newline in subsection") - } - - if ch == '"' { - break - } - - if ch == '\\' { - next, err := p.nextChar() - if err != nil { - return errors.New("unexpected EOF after backslash in subsection") - } - if next == '\n' { - return errors.New("newline after backslash in subsection") - } - subsec.WriteRune(next) - } else { - subsec.WriteRune(ch) - } - } - - ch, err := p.nextChar() - if err != nil { - return errors.New("unexpected EOF after subsection") - } - if ch != ']' { - return fmt.Errorf("expected ']' after subsection, got %q", ch) - } - - section := sectionName.String() - if !isValidSection(section) { - return fmt.Errorf("invalid section name: %q", section) - } - - p.currentSection = strings.ToLower(section) - p.currentSubsec = subsec.String() - return nil -} - -func (p *configParser) parseKeyValue(cfg *Config) error { - if p.currentSection == "" { - return errors.New("key-value pair before any section header") - } - - var key bytes.Buffer - for { - ch, err := p.nextChar() - if err != nil { - return errors.New("unexpected EOF reading key") - } - - if ch == '=' || ch == '\n' || unicode.IsSpace(ch) { - p.unreadChar(ch) - break - } - - if !isKeyChar(ch) { - return fmt.Errorf("invalid character in key: %q", ch) - } - - key.WriteRune(unicode.ToLower(ch)) - } - - keyStr := key.String() - if len(keyStr) == 0 { - return errors.New("empty key name") - } - if !unicode.IsLetter(rune(keyStr[0])) { - return errors.New("key must start with a letter") - } - - for { - ch, err := p.nextChar() - if err == io.EOF { - cfg.entries = append(cfg.entries, ConfigEntry{ - Section: p.currentSection, - Subsection: p.currentSubsec, - Key: keyStr, - Value: "true", - }) - return nil - } - if err != nil { - return err - } - - if ch == '\n' { - cfg.entries = append(cfg.entries, ConfigEntry{ - Section: p.currentSection, - Subsection: p.currentSubsec, - Key: keyStr, - Value: "true", - }) - return nil - } - - if ch == '#' || ch == ';' { - if err := p.skipToEOL(); err != nil && err != io.EOF { - return err - } - cfg.entries = append(cfg.entries, ConfigEntry{ - Section: p.currentSection, - Subsection: p.currentSubsec, - Key: keyStr, - Value: "true", - }) - return nil - } - - if ch == '=' { - break - } - - if !unicode.IsSpace(ch) { - return fmt.Errorf("unexpected character after key: %q", ch) - } - } - - value, err := p.parseValue() - if err != nil { - return err - } - - cfg.entries = append(cfg.entries, ConfigEntry{ - Section: p.currentSection, - Subsection: p.currentSubsec, - Key: keyStr, - Value: value, - }) - - return nil -} - -func (p *configParser) parseValue() (string, error) { - var value bytes.Buffer - var inQuote bool - var inComment bool - trimLen := 0 - - for { - ch, err := p.nextChar() - if err == io.EOF { - if inQuote { - return "", errors.New("unexpected EOF in quoted value") - } - if trimLen > 0 { - return value.String()[:trimLen], nil - } - return value.String(), nil - } - if err != nil { - return "", err - } - - if ch == '\n' { - if inQuote { - return "", errors.New("newline in quoted value") - } - if trimLen > 0 { - return value.String()[:trimLen], nil - } - return value.String(), nil - } - - if inComment { - continue - } - - if unicode.IsSpace(ch) && !inQuote { - if trimLen == 0 && value.Len() > 0 { - trimLen = value.Len() - } - if value.Len() > 0 { - value.WriteRune(ch) - } - continue - } - - if !inQuote && (ch == '#' || ch == ';') { - inComment = true - continue - } - - if trimLen > 0 { - trimLen = 0 - } - - if ch == '\\' { - next, err := p.nextChar() - if err == io.EOF { - return "", errors.New("unexpected EOF after backslash") - } - if err != nil { - return "", err - } - - switch next { - case '\n': - continue - case 'n': - value.WriteRune('\n') - case 't': - value.WriteRune('\t') - case 'b': - value.WriteRune('\b') - case '\\', '"': - value.WriteRune(next) - default: - return "", fmt.Errorf("invalid escape sequence: \\%c", next) - } - continue - } - - if ch == '"' { - inQuote = !inQuote - continue - } - - value.WriteRune(ch) - } -} - -func isValidSection(s string) bool { - if len(s) == 0 { - return false - } - for _, ch := range s { - if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '-' && ch != '.' { - return false - } - } - return true -} - -func isKeyChar(ch rune) bool { - return unicode.IsLetter(ch) || unicode.IsDigit(ch) || ch == '-' -} diff --git a/config/config_test.go b/config/config_test.go deleted file mode 100644 index 4296535f..00000000 --- a/config/config_test.go +++ /dev/null @@ -1,323 +0,0 @@ -package config - -import ( - "os" - "os/exec" - "path/filepath" - "strings" - "testing" -) - -func setupTestRepo(t *testing.T) (string, func()) { - t.Helper() - tempDir, err := os.MkdirTemp("", "furgit-config-test-*") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - cleanup := func() { - _ = os.RemoveAll(tempDir) - } - - cmd := exec.Command("git", "init", "--object-format=sha256", "--bare", tempDir) - cmd.Env = append(os.Environ(), "GIT_CONFIG_GLOBAL=/dev/null", "GIT_CONFIG_SYSTEM=/dev/null") - if output, err := cmd.CombinedOutput(); err != nil { - cleanup() - t.Fatalf("failed to init git repo: %v\n%s", err, output) - } - - return tempDir, cleanup -} - -func gitConfig(t *testing.T, dir string, args ...string) { - t.Helper() - fullArgs := append([]string{"config"}, args...) - cmd := exec.Command("git", fullArgs...) - cmd.Dir = dir - cmd.Env = append(os.Environ(), "GIT_CONFIG_GLOBAL=/dev/null", "GIT_CONFIG_SYSTEM=/dev/null") - if output, err := cmd.CombinedOutput(); err != nil { - t.Fatalf("git config %v failed: %v\n%s", args, err, output) - } -} - -func gitConfigGet(t *testing.T, dir, key string) string { - t.Helper() - cmd := exec.Command("git", "config", "--get", key) - cmd.Dir = dir - cmd.Env = append(os.Environ(), "GIT_CONFIG_GLOBAL=/dev/null", "GIT_CONFIG_SYSTEM=/dev/null") - output, err := cmd.CombinedOutput() - if err != nil { - return "" - } - return strings.TrimSpace(string(output)) -} - -func TestConfigAgainstGit(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitConfig(t, repoPath, "core.bare", "true") - gitConfig(t, repoPath, "core.filemode", "false") - gitConfig(t, repoPath, "user.name", "John Doe") - gitConfig(t, repoPath, "user.email", "john@example.com") - - cfgFile, err := os.Open(filepath.Join(repoPath, "config")) - if err != nil { - t.Fatalf("failed to open config: %v", err) - } - defer func() { _ = cfgFile.Close() }() - - cfg, err := ParseConfig(cfgFile) - if err != nil { - t.Fatalf("ParseConfig failed: %v", err) - } - - if got := cfg.Get("core", "", "bare"); got != "true" { - t.Errorf("core.bare: got %q, want %q", got, "true") - } - if got := cfg.Get("core", "", "filemode"); got != "false" { - t.Errorf("core.filemode: got %q, want %q", got, "false") - } - if got := cfg.Get("user", "", "name"); got != "John Doe" { - t.Errorf("user.name: got %q, want %q", got, "John Doe") - } - if got := cfg.Get("user", "", "email"); got != "john@example.com" { - t.Errorf("user.email: got %q, want %q", got, "john@example.com") - } -} - -func TestConfigSubsectionAgainstGit(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitConfig(t, repoPath, "remote.origin.url", "https://example.com/repo.git") - gitConfig(t, repoPath, "remote.origin.fetch", "+refs/heads/*:refs/remotes/origin/*") - - cfgFile, err := os.Open(filepath.Join(repoPath, "config")) - if err != nil { - t.Fatalf("failed to open config: %v", err) - } - defer func() { _ = cfgFile.Close() }() - - cfg, err := ParseConfig(cfgFile) - if err != nil { - t.Fatalf("ParseConfig failed: %v", err) - } - - if got := cfg.Get("remote", "origin", "url"); got != "https://example.com/repo.git" { - t.Errorf("remote.origin.url: got %q, want %q", got, "https://example.com/repo.git") - } - if got := cfg.Get("remote", "origin", "fetch"); got != "+refs/heads/*:refs/remotes/origin/*" { - t.Errorf("remote.origin.fetch: got %q, want %q", got, "+refs/heads/*:refs/remotes/origin/*") - } -} - -func TestConfigMultiValueAgainstGit(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitConfig(t, repoPath, "--add", "remote.origin.fetch", "+refs/heads/main:refs/remotes/origin/main") - gitConfig(t, repoPath, "--add", "remote.origin.fetch", "+refs/heads/dev:refs/remotes/origin/dev") - gitConfig(t, repoPath, "--add", "remote.origin.fetch", "+refs/tags/*:refs/tags/*") - - cfgFile, err := os.Open(filepath.Join(repoPath, "config")) - if err != nil { - t.Fatalf("failed to open config: %v", err) - } - defer func() { _ = cfgFile.Close() }() - - cfg, err := ParseConfig(cfgFile) - if err != nil { - t.Fatalf("ParseConfig failed: %v", err) - } - - fetches := cfg.GetAll("remote", "origin", "fetch") - if len(fetches) != 3 { - t.Fatalf("expected 3 fetch values, got %d", len(fetches)) - } - - expected := []string{ - "+refs/heads/main:refs/remotes/origin/main", - "+refs/heads/dev:refs/remotes/origin/dev", - "+refs/tags/*:refs/tags/*", - } - for i, want := range expected { - if fetches[i] != want { - t.Errorf("fetch[%d]: got %q, want %q", i, fetches[i], want) - } - } -} - -func TestConfigCaseInsensitiveAgainstGit(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitConfig(t, repoPath, "Core.Bare", "true") - gitConfig(t, repoPath, "CORE.FileMode", "false") - - gitVerifyBare := gitConfigGet(t, repoPath, "core.bare") - gitVerifyFilemode := gitConfigGet(t, repoPath, "core.filemode") - - cfgFile, err := os.Open(filepath.Join(repoPath, "config")) - if err != nil { - t.Fatalf("failed to open config: %v", err) - } - defer func() { _ = cfgFile.Close() }() - - cfg, err := ParseConfig(cfgFile) - if err != nil { - t.Fatalf("ParseConfig failed: %v", err) - } - - if got := cfg.Get("core", "", "bare"); got != gitVerifyBare { - t.Errorf("core.bare: got %q, want %q (from git)", got, gitVerifyBare) - } - if got := cfg.Get("CORE", "", "BARE"); got != gitVerifyBare { - t.Errorf("CORE.BARE: got %q, want %q (from git)", got, gitVerifyBare) - } - if got := cfg.Get("core", "", "filemode"); got != gitVerifyFilemode { - t.Errorf("core.filemode: got %q, want %q (from git)", got, gitVerifyFilemode) - } -} - -func TestConfigBooleanAgainstGit(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitConfig(t, repoPath, "test.flag1", "true") - gitConfig(t, repoPath, "test.flag2", "false") - gitConfig(t, repoPath, "test.flag3", "yes") - gitConfig(t, repoPath, "test.flag4", "no") - - cfgFile, err := os.Open(filepath.Join(repoPath, "config")) - if err != nil { - t.Fatalf("failed to open config: %v", err) - } - defer func() { _ = cfgFile.Close() }() - - cfg, err := ParseConfig(cfgFile) - if err != nil { - t.Fatalf("ParseConfig failed: %v", err) - } - - tests := []struct { - key string - want string - }{ - {"flag1", gitConfigGet(t, repoPath, "test.flag1")}, - {"flag2", gitConfigGet(t, repoPath, "test.flag2")}, - {"flag3", gitConfigGet(t, repoPath, "test.flag3")}, - {"flag4", gitConfigGet(t, repoPath, "test.flag4")}, - } - - for _, tt := range tests { - if got := cfg.Get("test", "", tt.key); got != tt.want { - t.Errorf("test.%s: got %q, want %q (from git)", tt.key, got, tt.want) - } - } -} - -func TestConfigComplexValuesAgainstGit(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitConfig(t, repoPath, "test.spaced", "value with spaces") - gitConfig(t, repoPath, "test.special", "value=with=equals") - gitConfig(t, repoPath, "test.path", "/path/to/something") - gitConfig(t, repoPath, "test.number", "12345") - - cfgFile, err := os.Open(filepath.Join(repoPath, "config")) - if err != nil { - t.Fatalf("failed to open config: %v", err) - } - defer func() { _ = cfgFile.Close() }() - - cfg, err := ParseConfig(cfgFile) - if err != nil { - t.Fatalf("ParseConfig failed: %v", err) - } - - tests := []string{"spaced", "special", "path", "number"} - for _, key := range tests { - want := gitConfigGet(t, repoPath, "test."+key) - if got := cfg.Get("test", "", key); got != want { - t.Errorf("test.%s: got %q, want %q (from git)", key, got, want) - } - } -} - -func TestConfigEntriesAgainstGit(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitConfig(t, repoPath, "core.bare", "true") - gitConfig(t, repoPath, "core.filemode", "false") - gitConfig(t, repoPath, "user.name", "Test User") - - cfgFile, err := os.Open(filepath.Join(repoPath, "config")) - if err != nil { - t.Fatalf("failed to open config: %v", err) - } - defer func() { _ = cfgFile.Close() }() - - cfg, err := ParseConfig(cfgFile) - if err != nil { - t.Fatalf("ParseConfig failed: %v", err) - } - - entries := cfg.Entries() - if len(entries) < 3 { - t.Errorf("expected at least 3 entries, got %d", len(entries)) - } - - found := make(map[string]bool) - for _, entry := range entries { - key := entry.Section + "." + entry.Key - if entry.Subsection != "" { - key = entry.Section + "." + entry.Subsection + "." + entry.Key - } - found[key] = true - - gitValue := gitConfigGet(t, repoPath, key) - if entry.Value != gitValue { - t.Errorf("entry %s: got value %q, git has %q", key, entry.Value, gitValue) - } - } -} - -func TestConfigErrorCases(t *testing.T) { - tests := []struct { - name string - config string - }{ - { - name: "key before section", - config: "bare = true", - }, - { - name: "invalid section character", - config: "[core/invalid]", - }, - { - name: "unterminated section", - config: "[core", - }, - { - name: "unterminated quote", - config: "[core]\n\tbare = \"true", - }, - { - name: "invalid escape", - config: "[core]\n\tvalue = \"test\\x\"", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - r := strings.NewReader(tt.config) - _, err := ParseConfig(r) - if err == nil { - t.Errorf("expected error for %s", tt.name) - } - }) - } -} diff --git a/difflines/difflines.go b/difflines/difflines.go deleted file mode 100644 index db4c1a03..00000000 --- a/difflines/difflines.go +++ /dev/null @@ -1,223 +0,0 @@ -// Package difflines provides routines to perform line-based diffs. -package difflines - -import "bytes" - -// DiffLines performs a line-based diff. -// Lines are bytes up to and including '\n' (final line may lack '\n'). -func DiffLines(oldB, newB []byte) ([]LinesDiffChunk, error) { - type lineRef struct { - base []byte - start int - end int - } - - split := func(b []byte) []lineRef { - if len(b) == 0 { - return nil - } - var res []lineRef - start := 0 - for i := range b { - if b[i] == '\n' { - res = append(res, lineRef{base: b, start: start, end: i + 1}) - start = i + 1 - } - } - if start < len(b) { - res = append(res, lineRef{base: b, start: start, end: len(b)}) - } - return res - } - - oldLines := split(oldB) - newLines := split(newB) - - n := len(oldLines) - m := len(newLines) - if n == 0 && m == 0 { - return nil, nil - } - - idOf := make(map[string]int) - nextID := 0 - oldIDs := make([]int, n) - for i, ln := range oldLines { - key := bytesToString(ln.base[ln.start:ln.end]) - id, ok := idOf[key] - if !ok { - id = nextID - idOf[key] = id - nextID++ - } - oldIDs[i] = id - } - newIDs := make([]int, m) - for i, ln := range newLines { - key := bytesToString(ln.base[ln.start:ln.end]) - id, ok := idOf[key] - if !ok { - id = nextID - idOf[key] = id - nextID++ - } - newIDs[i] = id - } - - max := n + m - offset := max - trace := make([][]int, 0, max+1) - - Vprev := make([]int, 2*max+1) - for i := range Vprev { - Vprev[i] = -1 - } - - x0 := 0 - y0 := 0 - for x0 < n && y0 < m && oldIDs[x0] == newIDs[y0] { - x0++ - y0++ - } - Vprev[offset+0] = x0 - trace = append(trace, append([]int(nil), Vprev...)) - - found := x0 >= n && y0 >= m - - for D := 1; D <= max && !found; D++ { - V := make([]int, 2*max+1) - for i := range V { - V[i] = -1 - } - - for k := -D; k <= D; k += 2 { - var x int - if k == -D || (k != D && Vprev[offset+(k-1)] < Vprev[offset+(k+1)]) { - x = Vprev[offset+(k+1)] - } else { - x = Vprev[offset+(k-1)] + 1 - } - y := x - k - - for x < n && y < m && oldIDs[x] == newIDs[y] { - x++ - y++ - } - V[offset+k] = x - - if x >= n && y >= m { - trace = append(trace, V) - found = true - break - } - } - - if !found { - trace = append(trace, V) - Vprev = V - } - } - - type edit struct { - kind LinesDiffChunkKind - lineref lineRef - } - revEdits := make([]edit, 0, n+m) - - x := n - y := m - for D := len(trace) - 1; D >= 0; D-- { - k := x - y - - var ( - prevK int - prevX int - prevY int - ) - if D > 0 { - prevV := trace[D-1] - if k == -D || (k != D && prevV[offset+(k-1)] < prevV[offset+(k+1)]) { - prevK = k + 1 - } else { - prevK = k - 1 - } - prevX = prevV[offset+prevK] - prevY = prevX - prevK - } - - for x > prevX && y > prevY { - x-- - y-- - revEdits = append(revEdits, edit{kind: LinesDiffChunkKindUnchanged, lineref: oldLines[x]}) - } - - if D == 0 { - break - } - - if x == prevX { - y-- - revEdits = append(revEdits, edit{kind: LinesDiffChunkKindAdded, lineref: newLines[y]}) - } else { - x-- - revEdits = append(revEdits, edit{kind: LinesDiffChunkKindDeleted, lineref: oldLines[x]}) - } - } - - for i, j := 0, len(revEdits)-1; i < j; i, j = i+1, j-1 { - revEdits[i], revEdits[j] = revEdits[j], revEdits[i] - } - - var out []LinesDiffChunk - type meta struct { - base []byte - start int - end int - } - var metas []meta - - for _, e := range revEdits { - curBase := e.lineref.base - curStart := e.lineref.start - curEnd := e.lineref.end - - if len(out) == 0 || out[len(out)-1].Kind != e.kind { - out = append(out, LinesDiffChunk{Kind: e.kind, Data: curBase[curStart:curEnd]}) - metas = append(metas, meta{base: curBase, start: curStart, end: curEnd}) - continue - } - - lastIdx := len(out) - 1 - lastMeta := metas[lastIdx] - - if bytes.Equal(lastMeta.base, curBase) && lastMeta.end == curStart { - metas[lastIdx].end = curEnd - out[lastIdx].Data = curBase[metas[lastIdx].start:metas[lastIdx].end] - continue - } - - out[lastIdx].Data = append(out[lastIdx].Data, curBase[curStart:curEnd]...) - metas[lastIdx] = meta{base: nil, start: 0, end: 0} - } - - return out, nil -} - -// LinesDiffChunk represents a contiguous region of lines categorized -// as unchanged, deleted, or added. -type LinesDiffChunk struct { - Kind LinesDiffChunkKind - Data []byte -} - -// LinesDiffChunkKind enumerates the type of diff chunk. -type LinesDiffChunkKind int - -const ( - // LinesDiffChunkKindUnchanged represents an unchanged diff chunk. - LinesDiffChunkKindUnchanged LinesDiffChunkKind = iota - // LinesDiffChunkKindDeleted represents a deleted diff chunk. - LinesDiffChunkKindDeleted - // LinesDiffChunkKindAdded represents an added diff chunk. - LinesDiffChunkKindAdded -) diff --git a/difflines/difflines_test.go b/difflines/difflines_test.go deleted file mode 100644 index 783c2d6e..00000000 --- a/difflines/difflines_test.go +++ /dev/null @@ -1,326 +0,0 @@ -package difflines - -import ( - "bytes" - "strconv" - "strings" - "testing" -) - -func TestDiffLines(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - oldInput string - newInput string - expected []LinesDiffChunk - }{ - { - name: "empty inputs produce no chunks", - oldInput: "", - newInput: "", - expected: []LinesDiffChunk{}, - }, - { - name: "only additions", - oldInput: "", - newInput: "alpha\nbeta\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindAdded, Data: []byte("alpha\nbeta\n")}, - }, - }, - { - name: "only deletions", - oldInput: "alpha\nbeta\n", - newInput: "", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindDeleted, Data: []byte("alpha\nbeta\n")}, - }, - }, - { - name: "unchanged content is grouped", - oldInput: "same\nlines\n", - newInput: "same\nlines\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("same\nlines\n")}, - }, - }, - { - name: "insertion in the middle", - oldInput: "a\nb\nc\n", - newInput: "a\nb\nX\nc\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("a\nb\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("X\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("c\n")}, - }, - }, - { - name: "replacement without trailing newline", - oldInput: "first\nsecond", - newInput: "first\nsecond\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("first\n")}, - {Kind: LinesDiffChunkKindDeleted, Data: []byte("second")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("second\n")}, - }, - }, - { - name: "line replacement", - oldInput: "a\nb\nc\n", - newInput: "a\nB\nc\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("a\n")}, - {Kind: LinesDiffChunkKindDeleted, Data: []byte("b\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("B\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("c\n")}, - }, - }, - { - name: "swap adjacent lines", - oldInput: "A\nB\n", - newInput: "B\nA\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindDeleted, Data: []byte("A\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("B\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("A\n")}, - }, - }, - { - name: "indentation change is a full line replacement", - oldInput: "func main() {\n\treturn\n}\n", - newInput: "func main() {\n return\n}\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("func main() {\n")}, - {Kind: LinesDiffChunkKindDeleted, Data: []byte("\treturn\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte(" return\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("}\n")}, - }, - }, - { - name: "commenting out lines", - oldInput: "code\n", - newInput: "// code\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindDeleted, Data: []byte("code\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("// code\n")}, - }, - }, - { - name: "reducing repeating lines", - oldInput: "log\nlog\nlog\n", - newInput: "log\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("log\n")}, - {Kind: LinesDiffChunkKindDeleted, Data: []byte("log\nlog\n")}, - }, - }, - { - name: "expanding repeating lines", - oldInput: "tick\n", - newInput: "tick\ntick\ntick\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("tick\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("tick\ntick\n")}, - }, - }, - { - name: "interleaved modifications", - oldInput: "keep\nchange\nkeep\nchange\n", - newInput: "keep\nfixed\nkeep\nfixed\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("keep\n")}, - {Kind: LinesDiffChunkKindDeleted, Data: []byte("change\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("fixed\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("keep\n")}, - {Kind: LinesDiffChunkKindDeleted, Data: []byte("change\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("fixed\n")}, - }, - }, - { - name: "large common header and footer", - oldInput: "header\nheader\nheader\nOLD\nfooter\nfooter\n", - newInput: "header\nheader\nheader\nNEW\nfooter\nfooter\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("header\nheader\nheader\n")}, - {Kind: LinesDiffChunkKindDeleted, Data: []byte("OLD\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("NEW\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("footer\nfooter\n")}, - }, - }, - { - name: "completely different content", - oldInput: "apple\nbanana\n", - newInput: "cherry\ndate\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindDeleted, Data: []byte("apple\nbanana\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("cherry\ndate\n")}, - }, - }, - { - name: "unicode and emoji changes", - oldInput: "Hello 🌍\nYay\n", - newInput: "Hello 🌎\nYay\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindDeleted, Data: []byte("Hello 🌍\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("Hello 🌎\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("Yay\n")}, - }, - }, - { - name: "binary data with embedded newlines", - oldInput: "\x00\x01\n\x02\x03\n", - newInput: "\x00\x01\n\x02\xFF\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("\x00\x01\n")}, - {Kind: LinesDiffChunkKindDeleted, Data: []byte("\x02\x03\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("\x02\xFF\n")}, - }, - }, - { - name: "adding trailing newline to last line", - oldInput: "Line 1\nLine 2", - newInput: "Line 1\nLine 2\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("Line 1\n")}, - {Kind: LinesDiffChunkKindDeleted, Data: []byte("Line 2")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("Line 2\n")}, - }, - }, - { - name: "removing trailing newline", - oldInput: "A\nB\n", - newInput: "A\nB", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("A\n")}, - {Kind: LinesDiffChunkKindDeleted, Data: []byte("B\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("B")}, - }, - }, - { - name: "inserting blank lines", - oldInput: "A\nB\n", - newInput: "A\n\n\nB\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("A\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("\n\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("B\n")}, - }, - }, - { - name: "collapsing blank lines", - oldInput: "A\n\n\n\nB\n", - newInput: "A\nB\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("A\n")}, - {Kind: LinesDiffChunkKindDeleted, Data: []byte("\n\n\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("B\n")}, - }, - }, - { - name: "case sensitivity check", - oldInput: "FOO\nbar\n", - newInput: "foo\nbar\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindDeleted, Data: []byte("FOO\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("foo\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("bar\n")}, - }, - }, - { - name: "partial line match is full mismatch", - oldInput: "The quick brown fox\n", - newInput: "The quick brown fox jumps\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindDeleted, Data: []byte("The quick brown fox\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("The quick brown fox jumps\n")}, - }, - }, - { - name: "inserting middle content", - oldInput: "Top\nBottom\n", - newInput: "Top\nMiddle\nBottom\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("Top\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("Middle\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("Bottom\n")}, - }, - }, - { - name: "block move simulated", - oldInput: "BlockA\nBlockB\nBlockC\n", - newInput: "BlockA\nBlockC\nBlockB\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("BlockA\n")}, - {Kind: LinesDiffChunkKindDeleted, Data: []byte("BlockB\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("BlockC\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("BlockB\n")}, - }, - }, - { - name: "alternating additions", - oldInput: "A\nB\nC\n", - newInput: "A\n1\nB\n2\nC\n", - expected: []LinesDiffChunk{ - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("A\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("1\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("B\n")}, - {Kind: LinesDiffChunkKindAdded, Data: []byte("2\n")}, - {Kind: LinesDiffChunkKindUnchanged, Data: []byte("C\n")}, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - chunks, err := DiffLines([]byte(tt.oldInput), []byte(tt.newInput)) - if err != nil { - t.Fatalf("DiffLines returned error: %v", err) - } - - if len(chunks) != len(tt.expected) { - t.Fatalf("expected %d chunks, got %d: %s", len(tt.expected), len(chunks), formatChunks(chunks)) - } - - for i := range tt.expected { - if chunks[i].Kind != tt.expected[i].Kind { - t.Fatalf("chunk %d kind mismatch: got %v, want %v; chunks: %s", i, chunks[i].Kind, tt.expected[i].Kind, formatChunks(chunks)) - } - if !bytes.Equal(chunks[i].Data, tt.expected[i].Data) { - t.Fatalf("chunk %d data mismatch: got %q, want %q; chunks: %s", i, string(chunks[i].Data), string(tt.expected[i].Data), formatChunks(chunks)) - } - } - }) - } -} - -func formatChunks(chunks []LinesDiffChunk) string { - var b strings.Builder - b.WriteByte('[') - for i, chunk := range chunks { - if i > 0 { - b.WriteString(", ") - } - b.WriteString(chunkKindName(chunk.Kind)) - b.WriteByte(':') - b.WriteString(strconv.Quote(string(chunk.Data))) - } - b.WriteByte(']') - return b.String() -} - -func chunkKindName(kind LinesDiffChunkKind) string { - switch kind { - case LinesDiffChunkKindUnchanged: - return "U" - case LinesDiffChunkKindDeleted: - return "D" - case LinesDiffChunkKindAdded: - return "A" - default: - return "?" - } -} diff --git a/difflines/unsafe.go b/difflines/unsafe.go deleted file mode 100644 index 6e7ac5fd..00000000 --- a/difflines/unsafe.go +++ /dev/null @@ -1,17 +0,0 @@ -package difflines - -import "unsafe" - -// // stringToBytes converts a string to a byte slice without copying the string. -// // Memory is borrowed from the string. -// // The resulting byte slice must not be modified in any form. -// func stringToBytes(s string) (bytes []byte) { -// return unsafe.Slice(unsafe.StringData(s), len(s)) //#nosec G103 -// } - -// bytesToString converts a byte slice to a string without copying the bytes. -// Memory is borrowed from the byte slice. -// The source byte slice must not be modified. -func bytesToString(b []byte) string { - return unsafe.String(unsafe.SliceData(b), len(b)) //#nosec G103 -} diff --git a/difftrees.go b/difftrees.go deleted file mode 100644 index ea21687c..00000000 --- a/difftrees.go +++ /dev/null @@ -1,207 +0,0 @@ -package furgit - -// TreeDiffEntryKind represents the type of difference between two tree entries. -type TreeDiffEntryKind int - -const ( - // TreeDiffEntryKindInvalid indicates an invalid difference type. - TreeDiffEntryKindInvalid TreeDiffEntryKind = iota - // TreeDiffEntryKindDeleted indicates that the entry was deleted. - TreeDiffEntryKindDeleted - // TreeDiffEntryKindAdded indicates that the entry was added. - TreeDiffEntryKindAdded - // TreeDiffEntryKindModified indicates that the entry was modified. - TreeDiffEntryKindModified -) - -// TreeDiffEntry represents a difference between two tree entries. -type TreeDiffEntry struct { - // Path is the full slash-separated path relative to the root - // of the repository. - Path []byte - // Kind indicates the type of difference. - Kind TreeDiffEntryKind - // Old is the old tree entry (nil iff added). - Old *TreeEntry - // New is the new tree entry (nil iff deleted). - New *TreeEntry -} - -// DiffTrees compares two trees rooted at a and b and returns all differences -// as a flat slice of TreeDiffEntry. Differences are discovered recursively. -func (repo *Repository) DiffTrees(a, b *StoredTree) ([]TreeDiffEntry, error) { - var out []TreeDiffEntry - err := repo.diffTreesRecursive(a, b, nil, &out) - return out, err -} - -func (repo *Repository) diffTreesRecursive(a, b *StoredTree, prefix []byte, out *[]TreeDiffEntry) error { - if a == nil && b == nil { - return nil - } - - if a == nil { - for i := range b.Entries { - entry := &b.Entries[i] - full := joinPath(prefix, entry.Name) - - *out = append(*out, TreeDiffEntry{ - Path: full, - Kind: TreeDiffEntryKindAdded, - Old: nil, - New: entry, - }) - - if entry.Mode == FileModeDir { - sub, err := repo.readTree(entry.ID) - if err != nil { - return err - } - if err := repo.diffTreesRecursive(nil, sub, full, out); err != nil { - return err - } - } - } - return nil - } - if b == nil { - for i := range a.Entries { - entry := &a.Entries[i] - full := joinPath(prefix, entry.Name) - - *out = append(*out, TreeDiffEntry{ - Path: full, - Kind: TreeDiffEntryKindDeleted, - Old: entry, - New: nil, - }) - - if entry.Mode == FileModeDir { - sub, err := repo.readTree(entry.ID) - if err != nil { - return err - } - if err := repo.diffTreesRecursive(sub, nil, full, out); err != nil { - return err - } - } - } - return nil - } - - left := make(map[string]*TreeEntry, len(a.Entries)) - for i := range a.Entries { - e := &a.Entries[i] - left[string(e.Name)] = e - } - right := make(map[string]*TreeEntry, len(b.Entries)) - for i := range b.Entries { - e := &b.Entries[i] - right[string(e.Name)] = e - } - - seen := make(map[string]bool, len(a.Entries)+len(b.Entries)) - for n := range left { - seen[n] = true - } - for n := range right { - seen[n] = true - } - - for name := range seen { - le := left[name] - re := right[name] - - full := joinPath(prefix, []byte(name)) - - switch { - case le == nil && re != nil: - *out = append(*out, TreeDiffEntry{ - Path: full, - Kind: TreeDiffEntryKindAdded, - Old: nil, - New: re, - }) - - if re.Mode == FileModeDir { - sub, err := repo.readTree(re.ID) - if err != nil { - return err - } - if err := repo.diffTreesRecursive(nil, sub, full, out); err != nil { - return err - } - } - - case le != nil && re == nil: - *out = append(*out, TreeDiffEntry{ - Path: full, - Kind: TreeDiffEntryKindDeleted, - Old: le, - New: nil, - }) - - if le.Mode == FileModeDir { - sub, err := repo.readTree(le.ID) - if err != nil { - return err - } - if err := repo.diffTreesRecursive(sub, nil, full, out); err != nil { - return err - } - } - - default: - modified := (le.Mode != re.Mode) || (le.ID != re.ID) - if modified { - *out = append(*out, TreeDiffEntry{ - Path: full, - Kind: TreeDiffEntryKindModified, - Old: le, - New: re, - }) - } - - if le.Mode == FileModeDir && re.Mode == FileModeDir && le.ID != re.ID { - ls, err := repo.readTree(le.ID) - if err != nil { - return err - } - rs, err := repo.readTree(re.ID) - if err != nil { - return err - } - if err := repo.diffTreesRecursive(ls, rs, full, out); err != nil { - return err - } - } - } - } - - return nil -} - -func joinPath(prefix, name []byte) []byte { - if len(prefix) == 0 { - out := make([]byte, len(name)) - copy(out, name) - return out - } - out := make([]byte, len(prefix)+1+len(name)) - copy(out, prefix) - out[len(prefix)] = '/' - copy(out[len(prefix)+1:], name) - return out -} - -func (repo *Repository) readTree(id Hash) (*StoredTree, error) { - obj, err := repo.ReadObject(id) - if err != nil { - return nil, err - } - tree, ok := obj.(*StoredTree) - if !ok { - return nil, ErrInvalidObject - } - return tree, nil -} diff --git a/difftrees_test.go b/difftrees_test.go deleted file mode 100644 index b8b89bb0..00000000 --- a/difftrees_test.go +++ /dev/null @@ -1,223 +0,0 @@ -package furgit - -import ( - "os" - "path/filepath" - "testing" -) - -func TestDiffTreesComplexNestedChanges(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - writeTestFile(t, filepath.Join(workDir, "README.md"), "initial readme\n") - writeTestFile(t, filepath.Join(workDir, "unchanged.txt"), "leave me as-is\n") - writeTestFile(t, filepath.Join(workDir, "dir", "file_a.txt"), "alpha v1\n") - writeTestFile(t, filepath.Join(workDir, "dir", "nested", "file_b.txt"), "beta v1\n") - writeTestFile(t, filepath.Join(workDir, "dir", "nested", "deeper", "file_c.txt"), "gamma v1\n") - writeTestFile(t, filepath.Join(workDir, "dir", "nested", "deeper", "old.txt"), "old branch\n") - writeTestFile(t, filepath.Join(workDir, "treeB", "legacy.txt"), "legacy root\n") - writeTestFile(t, filepath.Join(workDir, "treeB", "sub", "retired.txt"), "retired\n") - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - baseTreeHash := gitCmd(t, repoPath, "--work-tree="+workDir, "write-tree") - - writeTestFile(t, filepath.Join(workDir, "README.md"), "updated readme\n") - gitCmd(t, repoPath, "--work-tree="+workDir, "rm", "-f", "dir/file_a.txt") - writeTestFile(t, filepath.Join(workDir, "dir", "nested", "file_b.txt"), "beta v2\n") - gitCmd(t, repoPath, "--work-tree="+workDir, "rm", "-f", "dir/nested/deeper/old.txt") - writeTestFile(t, filepath.Join(workDir, "dir", "nested", "deeper", "new.txt"), "new branch entry\n") - writeTestFile(t, filepath.Join(workDir, "dir", "nested", "deeper", "branch", "info.md"), "branch info\n") - writeTestFile(t, filepath.Join(workDir, "dir", "nested", "deeper", "branch", "subbranch", "leaf.txt"), "leaf data\n") - writeTestFile(t, filepath.Join(workDir, "dir", "nested", "deeper", "branch", "subbranch", "deep", "final.txt"), "final artifact\n") - writeTestFile(t, filepath.Join(workDir, "dir", "newchild.txt"), "brand new sibling\n") - gitCmd(t, repoPath, "--work-tree="+workDir, "rm", "-r", "-f", "treeB") - writeTestFile(t, filepath.Join(workDir, "features", "alpha", "README.md"), "alpha docs\n") - writeTestFile(t, filepath.Join(workDir, "features", "alpha", "beta", "gamma.txt"), "gamma payload\n") - writeTestFile(t, filepath.Join(workDir, "modules", "v2", "core", "main.go"), "package core\n") - writeTestFile(t, filepath.Join(workDir, "root_addition.txt"), "root level file\n") - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - updatedTreeHash := gitCmd(t, repoPath, "--work-tree="+workDir, "write-tree") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - baseTree := readStoredTree(t, repo, baseTreeHash) - updatedTree := readStoredTree(t, repo, updatedTreeHash) - - diffs, err := repo.DiffTrees(baseTree, updatedTree) - if err != nil { - t.Fatalf("DiffTrees failed: %v", err) - } - - expected := map[string]diffExpectation{ - "README.md": {kind: TreeDiffEntryKindModified}, - "dir": {kind: TreeDiffEntryKindModified}, - "dir/file_a.txt": {kind: TreeDiffEntryKindDeleted, newNil: true}, - "dir/newchild.txt": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "dir/nested": {kind: TreeDiffEntryKindModified}, - "dir/nested/file_b.txt": {kind: TreeDiffEntryKindModified}, - "dir/nested/deeper": {kind: TreeDiffEntryKindModified}, - "dir/nested/deeper/old.txt": {kind: TreeDiffEntryKindDeleted, newNil: true}, - "dir/nested/deeper/new.txt": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "dir/nested/deeper/branch": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "dir/nested/deeper/branch/info.md": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "dir/nested/deeper/branch/subbranch": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "dir/nested/deeper/branch/subbranch/leaf.txt": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "dir/nested/deeper/branch/subbranch/deep": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "dir/nested/deeper/branch/subbranch/deep/final.txt": { - kind: TreeDiffEntryKindAdded, - oldNil: true, - }, - "features": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "features/alpha": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "features/alpha/README.md": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "features/alpha/beta": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "features/alpha/beta/gamma.txt": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "modules": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "modules/v2": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "modules/v2/core": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "modules/v2/core/main.go": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "root_addition.txt": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "treeB": {kind: TreeDiffEntryKindDeleted, newNil: true}, - "treeB/legacy.txt": {kind: TreeDiffEntryKindDeleted, newNil: true}, - "treeB/sub": {kind: TreeDiffEntryKindDeleted, newNil: true}, - "treeB/sub/retired.txt": {kind: TreeDiffEntryKindDeleted, newNil: true}, - } - - checkDiffs(t, diffs, expected) -} - -func TestDiffTreesDirectoryAddDeleteDeep(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - writeTestFile(t, filepath.Join(workDir, "old_dir", "old.txt"), "stale directory\n") - writeTestFile(t, filepath.Join(workDir, "old_dir", "sub1", "legacy.txt"), "legacy path\n") - writeTestFile(t, filepath.Join(workDir, "old_dir", "sub1", "nested", "end.txt"), "legacy end\n") - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - originalTreeHash := gitCmd(t, repoPath, "--work-tree="+workDir, "write-tree") - - gitCmd(t, repoPath, "--work-tree="+workDir, "rm", "-r", "-f", "old_dir") - writeTestFile(t, filepath.Join(workDir, "fresh", "alpha", "beta", "new.txt"), "brand new directory\n") - writeTestFile(t, filepath.Join(workDir, "fresh", "alpha", "docs", "note.md"), "docs note\n") - writeTestFile(t, filepath.Join(workDir, "fresh", "alpha", "beta", "gamma", "delta.txt"), "delta payload\n") - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - nextTreeHash := gitCmd(t, repoPath, "--work-tree="+workDir, "write-tree") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - originalTree := readStoredTree(t, repo, originalTreeHash) - nextTree := readStoredTree(t, repo, nextTreeHash) - - diffs, err := repo.DiffTrees(originalTree, nextTree) - if err != nil { - t.Fatalf("DiffTrees failed: %v", err) - } - - expected := map[string]diffExpectation{ - "fresh": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "fresh/alpha": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "fresh/alpha/beta": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "fresh/alpha/beta/new.txt": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "fresh/alpha/beta/gamma": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "fresh/alpha/beta/gamma/delta.txt": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "fresh/alpha/docs": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "fresh/alpha/docs/note.md": {kind: TreeDiffEntryKindAdded, oldNil: true}, - "old_dir": {kind: TreeDiffEntryKindDeleted, newNil: true}, - "old_dir/old.txt": {kind: TreeDiffEntryKindDeleted, newNil: true}, - "old_dir/sub1": {kind: TreeDiffEntryKindDeleted, newNil: true}, - "old_dir/sub1/legacy.txt": {kind: TreeDiffEntryKindDeleted, newNil: true}, - "old_dir/sub1/nested": {kind: TreeDiffEntryKindDeleted, newNil: true}, - "old_dir/sub1/nested/end.txt": {kind: TreeDiffEntryKindDeleted, newNil: true}, - } - - checkDiffs(t, diffs, expected) -} - -type diffExpectation struct { - kind TreeDiffEntryKind - oldNil bool - newNil bool -} - -func writeTestFile(t *testing.T, path string, data string) { - t.Helper() - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - t.Fatalf("failed to create directory for %s: %v", path, err) - } - if err := os.WriteFile(path, []byte(data), 0o644); err != nil { - t.Fatalf("failed to write %s: %v", path, err) - } -} - -func readStoredTree(t *testing.T, repo *Repository, hashStr string) *StoredTree { - t.Helper() - hash, err := repo.ParseHash(hashStr) - if err != nil { - t.Fatalf("ParseHash failed: %v", err) - } - obj, err := repo.ReadObject(hash) - if err != nil { - t.Fatalf("ReadObject failed: %v", err) - } - tree, ok := obj.(*StoredTree) - if !ok { - t.Fatalf("expected *StoredTree, got %T", obj) - } - return tree -} - -func checkDiffs(t *testing.T, diffs []TreeDiffEntry, expected map[string]diffExpectation) { - t.Helper() - got := make(map[string]TreeDiffEntry, len(diffs)) - for _, diff := range diffs { - key := string(diff.Path) - if _, exists := got[key]; exists { - t.Fatalf("duplicate diff entry for %q", key) - } - got[key] = diff - } - if len(got) != len(expected) { - t.Fatalf("unexpected diff count: got %d, want %d", len(got), len(expected)) - } - - for path, want := range expected { - diff, ok := got[path] - if !ok { - t.Fatalf("missing diff for %q", path) - } - if diff.Kind != want.kind { - t.Errorf("%s kind: got %v, want %v", path, diff.Kind, want.kind) - } - if (diff.Old == nil) != want.oldNil { - t.Errorf("%s old nil mismatch: got %v, want %v", path, diff.Old == nil, want.oldNil) - } - if (diff.New == nil) != want.newNil { - t.Errorf("%s new nil mismatch: got %v, want %v", path, diff.New == nil, want.newNil) - } - if diff.Kind == TreeDiffEntryKindModified && diff.Old != nil && diff.New != nil && diff.Old.ID == diff.New.ID { - t.Errorf("%s: modified entry should change IDs", path) - } - } -} diff --git a/errors.go b/errors.go deleted file mode 100644 index 675586f5..00000000 --- a/errors.go +++ /dev/null @@ -1,12 +0,0 @@ -package furgit - -import "errors" - -var ( - // ErrInvalidObject indicates malformed serialized data. - ErrInvalidObject = errors.New("furgit: invalid object encoding") - // ErrInvalidRef indicates malformed refs. - ErrInvalidRef = errors.New("furgit: invalid ref") - // ErrNotFound indicates missing refs/objects. - ErrNotFound = errors.New("furgit: not found") -) diff --git a/errors_test.go b/errors_test.go deleted file mode 100644 index 29803f97..00000000 --- a/errors_test.go +++ /dev/null @@ -1,17 +0,0 @@ -package furgit - -import ( - "testing" -) - -func TestErrors(t *testing.T) { - if ErrInvalidObject == nil { - t.Error("ErrInvalidObject should not be nil") - } - if ErrInvalidRef == nil { - t.Error("ErrInvalidRef should not be nil") - } - if ErrNotFound == nil { - t.Error("ErrNotFound should not be nil") - } -} diff --git a/git.go b/git.go deleted file mode 100644 index 3a0713f4..00000000 --- a/git.go +++ /dev/null @@ -1,2 +0,0 @@ -// Package furgit implements low-level Git operations. -package furgit diff --git a/go.mod b/go.mod deleted file mode 100644 index dacfe175..00000000 --- a/go.mod +++ /dev/null @@ -1,7 +0,0 @@ -module codeberg.org/lindenii/furgit - -go 1.25 - -require golang.org/x/sys v0.40.0 - -require github.com/cespare/xxhash/v2 v2.3.0 diff --git a/go.sum b/go.sum deleted file mode 100644 index 24cf0305..00000000 --- a/go.sum +++ /dev/null @@ -1,4 +0,0 @@ -github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= -github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= -golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= diff --git a/hash.go b/hash.go deleted file mode 100644 index 81b269c3..00000000 --- a/hash.go +++ /dev/null @@ -1,138 +0,0 @@ -package furgit - -import ( - "crypto/sha1" - "crypto/sha256" - "encoding/hex" - "hash" -) - -// maxHashSize MUST be >= the largest supported algorithm size. -const maxHashSize = sha256.Size - -// hashAlgorithm identifies the hash algorithm used for Git object IDs. -type hashAlgorithm uint8 - -const ( - hashAlgoUnknown hashAlgorithm = iota - hashAlgoSHA1 - hashAlgoSHA256 -) - -type hashAlgorithmDetails struct { - name string - size int - sum func([]byte) Hash - new func() hash.Hash -} - -var hashAlgorithmTable = [...]hashAlgorithmDetails{ - hashAlgoUnknown: {}, - hashAlgoSHA1: { - name: "sha1", - size: sha1.Size, - sum: func(data []byte) Hash { - sum := sha1.Sum(data) - var h Hash - copy(h.data[:], sum[:]) - h.algo = hashAlgoSHA1 - return h - }, - new: func() hash.Hash { - return sha1.New() - }, - }, - hashAlgoSHA256: { - name: "sha256", - size: sha256.Size, - sum: func(data []byte) Hash { - sum := sha256.Sum256(data) - var h Hash - copy(h.data[:], sum[:]) - h.algo = hashAlgoSHA256 - return h - }, - new: func() hash.Hash { - return sha256.New() - }, - }, -} - -func (algo hashAlgorithm) info() hashAlgorithmDetails { - return hashAlgorithmTable[algo] -} - -// Size returns the hash size in bytes. -func (algo hashAlgorithm) Size() int { - return algo.info().size -} - -// String returns the canonical name of the hash algorithm. -func (algo hashAlgorithm) String() string { - inf := algo.info() - if inf.name == "" { - return "unknown" - } - return inf.name -} - -func (algo hashAlgorithm) HexLen() int { - return algo.Size() * 2 -} - -func (algo hashAlgorithm) Sum(data []byte) Hash { - return algo.info().sum(data) -} - -func (algo hashAlgorithm) New() (hash.Hash, error) { - newFn := algo.info().new - if newFn == nil { - return nil, ErrInvalidObject - } - return newFn(), nil -} - -// Hash represents a Git object ID. -type Hash struct { - algo hashAlgorithm - data [maxHashSize]byte -} - -// String returns a hexadecimal string representation of the hash. -func (hash Hash) String() string { - size := hash.algo.Size() - if size == 0 { - return "" - } - return hex.EncodeToString(hash.data[:size]) -} - -// Bytes returns a copy of the hash's bytes. -func (hash Hash) Bytes() []byte { - size := hash.algo.Size() - if size == 0 { - return nil - } - return append([]byte(nil), hash.data[:size]...) -} - -// Size returns the hash size. -func (hash Hash) Size() int { - return hash.algo.Size() -} - -var algoByName = map[string]hashAlgorithm{} - -func init() { - for algo, info := range hashAlgorithmTable { - if info.name == "" { - continue - } - algoByName[info.name] = hashAlgorithm(algo) - } -} - -func parseHashAlgorithm(s string) (hashAlgorithm, bool) { - algo, ok := algoByName[s] - return algo, ok -} diff --git a/hash_test.go b/hash_test.go deleted file mode 100644 index 0b15fd38..00000000 --- a/hash_test.go +++ /dev/null @@ -1,75 +0,0 @@ -package furgit - -import ( - "testing" -) - -func TestHashParse(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - var validHash string - var expectedSize int - if repo.hashAlgo.Size() == 32 { - validHash = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" - expectedSize = 32 - } else { - validHash = "0123456789abcdef0123456789abcdef01234567" - expectedSize = 20 - } - - hash, err := repo.ParseHash(validHash) - if err != nil { - t.Fatalf("ParseHash failed: %v", err) - } - if hash.String() != validHash { - t.Errorf("String(): got %q, want %q", hash.String(), validHash) - } - if hash.Size() != expectedSize { - t.Errorf("Size(): got %d, want %d", hash.Size(), expectedSize) - } - - hashBytes := hash.Bytes() - if len(hashBytes) != expectedSize { - t.Errorf("Bytes() length: got %d, want %d", len(hashBytes), expectedSize) - } -} - -func TestHashParseErrors(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - tests := []struct { - name string - hash string - }{ - {"invalid chars", "invalid"}, - {"wrong length", "0123456789abcdef"}, - {"non-hex", "0123456789abcdefg123456789abcdef0123456789abcdef0123456789abcdef"}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - _, err := repo.ParseHash(tt.hash) - if err == nil { - t.Errorf("expected error for %s", tt.name) - } - }) - } -} diff --git a/headers.go b/headers.go deleted file mode 100644 index 5a8b46eb..00000000 --- a/headers.go +++ /dev/null @@ -1,9 +0,0 @@ -package furgit - -// ExtraHeader represents an extra header in a Git object. -type ExtraHeader struct { - // Key represents the header key. - Key string - // Value represents the header value. - Value []byte -} diff --git a/hybrid_test.go b/hybrid_test.go deleted file mode 100644 index 083605d8..00000000 --- a/hybrid_test.go +++ /dev/null @@ -1,271 +0,0 @@ -package furgit - -import ( - "bytes" - "fmt" - "os" - "path/filepath" - "testing" -) - -func TestTreeNestedDeep(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - depth := 50 - currentDir := workDir - for i := 0; i < depth; i++ { - currentDir = filepath.Join(currentDir, fmt.Sprintf("level%d", i)) - err := os.MkdirAll(currentDir, 0o755) - if err != nil { - t.Fatalf("failed to create directory %s: %v", currentDir, err) - } - } - err := os.WriteFile(filepath.Join(currentDir, "deep.txt"), []byte("deep content"), 0o644) - if err != nil { - t.Fatalf("failed to create deep.txt: %v", err) - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - treeHash := gitCmd(t, repoPath, "--work-tree="+workDir, "write-tree") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - hash, _ := repo.ParseHash(treeHash) - obj, _ := repo.ReadObject(hash) - tree := obj.(*StoredTree) - - path := make([][]byte, depth+1) - for i := 0; i < depth; i++ { - path[i] = []byte(fmt.Sprintf("level%d", i)) - } - path[depth] = []byte("deep.txt") - - entry, err := tree.EntryRecursive(repo, path) - if err != nil { - t.Fatalf("EntryRecursive failed for deep path: %v", err) - } - - blobObj, _ := repo.ReadObject(entry.ID) - blob := blobObj.(*StoredBlob) - - if !bytes.Equal(blob.Data, []byte("deep content")) { - t.Errorf("deep file content: got %q, want %q", blob.Data, "deep content") - } -} - -func TestTreeMixedModes(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "normal.txt"), []byte("normal"), 0o644) - if err != nil { - t.Fatalf("failed to create normal.txt: %v", err) - } - err = os.WriteFile(filepath.Join(workDir, "executable.sh"), []byte("#!/bin/sh\necho test"), 0o755) - if err != nil { - t.Fatalf("failed to create executable.sh: %v", err) - } - err = os.Symlink("normal.txt", filepath.Join(workDir, "link.txt")) - if err != nil { - t.Fatalf("failed to create symlink: %v", err) - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - treeHash := gitCmd(t, repoPath, "--work-tree="+workDir, "write-tree") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - hash, _ := repo.ParseHash(treeHash) - obj, _ := repo.ReadObject(hash) - tree := obj.(*StoredTree) - - modes := make(map[string]FileMode) - for _, entry := range tree.Entries { - modes[string(entry.Name)] = entry.Mode - } - - if modes["normal.txt"] != 0o100644 { - t.Errorf("normal.txt mode: got %o, want %o", modes["normal.txt"], 0o100644) - } - if modes["executable.sh"] != 0o100755 { - t.Errorf("executable.sh mode: got %o, want %o", modes["executable.sh"], 0o100755) - } - if modes["link.txt"] != 0o120000 { - t.Errorf("link.txt mode: got %o, want %o", modes["link.txt"], 0o120000) - } -} - -func TestCommitChain(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - numCommits := 100 - var commits []string - - for i := 0; i < numCommits; i++ { - filename := filepath.Join(workDir, fmt.Sprintf("file%d.txt", i)) - err := os.WriteFile(filename, []byte(fmt.Sprintf("content %d", i)), 0o644) - if err != nil { - t.Fatalf("failed to create %s: %v", filename, err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("Commit %d", i)) - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - commits = append(commits, commitHash) - } - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - hash, _ := repo.ParseHash(commits[len(commits)-1]) - for i := numCommits - 1; i >= 0; i-- { - obj, err := repo.ReadObject(hash) - if err != nil { - t.Fatalf("failed to read commit %d: %v", i, err) - } - - commit, ok := obj.(*StoredCommit) - if !ok { - t.Fatalf("expected *StoredCommit at %d, got %T", i, obj) - } - - expectedMsg := fmt.Sprintf("Commit %d\n", i) - if !bytes.Equal(commit.Message, []byte(expectedMsg)) { - t.Errorf("commit %d message: got %q, want %q", i, commit.Message, expectedMsg) - } - - if i > 0 { - if len(commit.Parents) != 1 { - t.Fatalf("commit %d should have 1 parent, got %d", i, len(commit.Parents)) - } - hash = commit.Parents[0] - } else { - if len(commit.Parents) != 0 { - t.Errorf("first commit should have 0 parents, got %d", len(commit.Parents)) - } - } - } -} - -func TestMultipleTags(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "file.txt"), []byte("content"), 0o644) - if err != nil { - t.Fatalf("failed to create file.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Tagged commit") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - tags := []string{"v1.0.0", "v1.0.1", "v1.1.0", "v2.0.0"} - for _, tagName := range tags { - gitCmd(t, repoPath, "tag", "-a", "-m", fmt.Sprintf("Release %s", tagName), tagName, commitHash) - } - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - for _, tagName := range tags { - tagHash := gitCmd(t, repoPath, "rev-parse", tagName) - hash, _ := repo.ParseHash(tagHash) - obj, err := repo.ReadObject(hash) - if err != nil { - t.Errorf("failed to read tag %s: %v", tagName, err) - continue - } - - tag, ok := obj.(*StoredTag) - if !ok { - t.Errorf("tag %s: expected *StoredTag, got %T", tagName, obj) - continue - } - - if !bytes.Equal(tag.Name, []byte(tagName)) { - t.Errorf("tag name: got %q, want %q", tag.Name, tagName) - } - } -} - -func TestPackfileAfterMultipleRepacks(t *testing.T) { - if testing.Short() { - t.Skip("skipping multiple repack test in short mode") - } - - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitCmd(t, repoPath, "config", "gc.auto", "0") - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - for i := 0; i < 5; i++ { - err := os.WriteFile(filepath.Join(workDir, fmt.Sprintf("file%d.txt", i)), []byte(fmt.Sprintf("content %d", i)), 0o644) - if err != nil { - t.Fatalf("failed to create file%d.txt: %v", i, err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("Commit %d", i)) - gitCmd(t, repoPath, "repack", "-d") - } - - gitCmd(t, repoPath, "repack", "-a", "-d") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - headHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - hash, _ := repo.ParseHash(headHash) - - obj, err := repo.ReadObject(hash) - if err != nil { - t.Fatalf("failed to read HEAD from final packfile: %v", err) - } - - commit := obj.(*StoredCommit) - if !bytes.Contains(commit.Message, []byte("Commit 4")) { - t.Errorf("HEAD commit message incorrect: got %q", commit.Message) - } -} diff --git a/ident.go b/ident.go deleted file mode 100644 index 50676a45..00000000 --- a/ident.go +++ /dev/null @@ -1,127 +0,0 @@ -package furgit - -import ( - "bytes" - "errors" - "fmt" - "math" - "strconv" - "strings" - "time" -) - -// Ident represents a Git identity (author/committer/tagger). -type Ident struct { - // Name represents the person's name. - Name []byte - // Email represents the person's email. - Email []byte - // WhenUnix represents the timestamp as a Unix time. - // This value is in UTC. - WhenUnix int64 - // The timezone offset in minutes. - OffsetMinutes int32 -} - -// parseIdent parses an identity line from the canonical Git format: -// "Name <email> 123456789 +0000". -func parseIdent(line []byte) (*Ident, error) { - lt := bytes.IndexByte(line, '<') - if lt < 0 { - return nil, errors.New("furgit: ident: missing opening <") - } - gtRel := bytes.IndexByte(line[lt+1:], '>') - if gtRel < 0 { - return nil, errors.New("furgit: ident: missing closing >") - } - gt := lt + 1 + gtRel - nameBytes := append([]byte(nil), line[:lt]...) - emailBytes := append([]byte(nil), line[lt+1:gt]...) - - rest := line[gt+1:] - if len(rest) == 0 || rest[0] != ' ' { - return nil, errors.New("furgit: ident: missing timestamp separator") - } - rest = rest[1:] - sp := bytes.IndexByte(rest, ' ') - if sp < 0 { - return nil, errors.New("furgit: ident: missing timezone separator") - } - whenStr := string(rest[:sp]) - when, err := strconv.ParseInt(whenStr, 10, 64) - if err != nil { - return nil, fmt.Errorf("furgit: ident: invalid timestamp: %w", err) - } - - tz := rest[sp+1:] - if len(tz) < 5 { - return nil, errors.New("furgit: ident: invalid timezone encoding") - } - sign := 1 - switch tz[0] { - case '-': - sign = -1 - case '+': - default: - return nil, errors.New("furgit: ident: invalid timezone sign") - } - - hh, err := strconv.Atoi(string(tz[1:3])) - if err != nil { - return nil, fmt.Errorf("furgit: ident: invalid timezone hours: %w", err) - } - mm, err := strconv.Atoi(string(tz[3:5])) - if err != nil { - return nil, fmt.Errorf("furgit: ident: invalid timezone minutes: %w", err) - } - if hh < 0 || hh > 23 { - return nil, errors.New("furgit: ident: invalid timezone hours range") - } - if mm < 0 || mm > 59 { - return nil, errors.New("furgit: ident: invalid timezone minutes range") - } - total := int64(hh)*60 + int64(mm) - if total > math.MaxInt32 { - return nil, errors.New("furgit: ident: timezone overflow") - } - offset := int32(total) - if sign < 0 { - offset = -offset - } - - return &Ident{ - Name: nameBytes, - Email: emailBytes, - WhenUnix: when, - OffsetMinutes: offset, - }, nil -} - -// Serialize renders an Ident into canonical Git format. -func (ident Ident) Serialize() ([]byte, error) { - var b strings.Builder - b.Grow(len(ident.Name) + len(ident.Email) + 32) - b.Write(ident.Name) - b.WriteString(" <") - b.Write(ident.Email) - b.WriteString("> ") - b.WriteString(strconv.FormatInt(ident.WhenUnix, 10)) - b.WriteByte(' ') - - offset := ident.OffsetMinutes - sign := '+' - if offset < 0 { - sign = '-' - offset = -offset - } - hh := offset / 60 - mm := offset % 60 - fmt.Fprintf(&b, "%c%02d%02d", sign, hh, mm) - return []byte(b.String()), nil -} - -// When returns the ident's time.Time with the correct timezone. -func (ident Ident) When() time.Time { - loc := time.FixedZone("git", int(ident.OffsetMinutes)*60) - return time.Unix(ident.WhenUnix, 0).In(loc) -} diff --git a/ident_test.go b/ident_test.go deleted file mode 100644 index a3d3d03e..00000000 --- a/ident_test.go +++ /dev/null @@ -1,73 +0,0 @@ -package furgit - -import ( - "bytes" - "testing" -) - -func TestIdentSerialize(t *testing.T) { - tests := []struct { - name string - ident Ident - }{ - { - name: "positive offset", - ident: Ident{ - Name: []byte("John Doe"), - Email: []byte("john@example.org"), - WhenUnix: 1234567890, - OffsetMinutes: 120, - }, - }, - { - name: "negative offset", - ident: Ident{ - Name: []byte("Jane Smith"), - Email: []byte("jane@example.org"), - WhenUnix: 9876543210, - OffsetMinutes: -300, - }, - }, - { - name: "zero offset", - ident: Ident{ - Name: []byte("UTC User"), - Email: []byte("utc@example.org"), - WhenUnix: 1000000000, - OffsetMinutes: 0, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - serialized, err := tt.ident.Serialize() - if err != nil { - t.Fatalf("Serialize failed: %v", err) - } - - parsed, err := parseIdent(serialized) - if err != nil { - t.Fatalf("parseIdent failed: %v", err) - } - - if !bytes.HasPrefix(parsed.Name, tt.ident.Name) { - t.Errorf("name: got %q, want prefix %q", parsed.Name, tt.ident.Name) - } - if !bytes.Equal(parsed.Email, tt.ident.Email) { - t.Errorf("email: got %q, want %q", parsed.Email, tt.ident.Email) - } - if parsed.WhenUnix != tt.ident.WhenUnix { - t.Errorf("whenUnix: got %d, want %d", parsed.WhenUnix, tt.ident.WhenUnix) - } - if parsed.OffsetMinutes != tt.ident.OffsetMinutes { - t.Errorf("offsetMinutes: got %d, want %d", parsed.OffsetMinutes, tt.ident.OffsetMinutes) - } - - when := tt.ident.When() - if when.Unix() != tt.ident.WhenUnix { - t.Errorf("When().Unix(): got %d, want %d", when.Unix(), tt.ident.WhenUnix) - } - }) - } -} diff --git a/internal/adler32/LICENSE b/internal/adler32/LICENSE deleted file mode 100644 index 5cec357a..00000000 --- a/internal/adler32/LICENSE +++ /dev/null @@ -1,30 +0,0 @@ -Copyright (c) 2024, Michal Hruby -Copyright (c) 2017 The Chromium Authors. All rights reserved. -Copyright (c) 1995-2024 Mark Adler -Copyright (c) 1995-2024 Jean-loup Gailly -Copyright (c) 2022 Adam Stylinski - -BSD 2-Clause License - - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/internal/adler32/LICENSE.ZLIB b/internal/adler32/LICENSE.ZLIB deleted file mode 100644 index c75c1568..00000000 --- a/internal/adler32/LICENSE.ZLIB +++ /dev/null @@ -1,17 +0,0 @@ -Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler - -This software is provided 'as-is', without any express or implied -warranty. In no event will the authors be held liable for any damages -arising from the use of this software. - -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. diff --git a/internal/adler32/README b/internal/adler32/README deleted file mode 100644 index b80acd00..00000000 --- a/internal/adler32/README +++ /dev/null @@ -1 +0,0 @@ -This package was mostly copied from github.com/mhr3/adler32-simd. diff --git a/internal/adler32/adler32_amd64.go b/internal/adler32/adler32_amd64.go deleted file mode 100644 index 88a854ed..00000000 --- a/internal/adler32/adler32_amd64.go +++ /dev/null @@ -1,93 +0,0 @@ -//go:build amd64 && !purego - -package adler32 - -import ( - "encoding/binary" - "errors" - "hash" - "hash/adler32" - - "golang.org/x/sys/cpu" -) - -// The size of an Adler-32 checksum in bytes. -const Size = 4 - -var ( - hasSSE3 = cpu.X86.HasSSE3 - hasAVX2 = cpu.X86.HasAVX2 -) - -// digest represents the partial evaluation of a checksum. -// The low 16 bits are s1, the high 16 bits are s2. -type digest uint32 - -func (d *digest) Reset() { *d = 1 } - -// New returns a new hash.Hash32 computing the Adler-32 checksum. -func New() hash.Hash32 { - if !hasSSE3 { - return adler32.New() - } - d := new(digest) - d.Reset() - return d -} - -func (d *digest) MarshalBinary() ([]byte, error) { - b := make([]byte, 0, marshaledSize) - b = append(b, magic...) - b = binary.BigEndian.AppendUint32(b, uint32(*d)) - return b, nil -} - -func (d *digest) UnmarshalBinary(b []byte) error { - if len(b) < len(magic) || string(b[:len(magic)]) != magic { - return errors.New("hash/adler32: invalid hash state identifier") - } - if len(b) != marshaledSize { - return errors.New("hash/adler32: invalid hash state size") - } - *d = digest(binary.BigEndian.Uint32(b[len(magic):])) - return nil -} - -func (d *digest) Size() int { return Size } - -func (d *digest) BlockSize() int { return 4 } - -func (d *digest) Write(data []byte) (nn int, err error) { - if len(data) >= 64 { - var h uint32 - if hasAVX2 { - h = adler32_avx2(uint32(*d), data) - } else { - h = adler32_sse3(uint32(*d), data) - } - *d = digest(h) - } else { - h := update(uint32(*d), data) - *d = digest(h) - } - return len(data), nil -} - -func (d *digest) Sum32() uint32 { return uint32(*d) } - -func (d *digest) Sum(in []byte) []byte { - s := uint32(*d) - return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s)) -} - -// Checksum returns the Adler-32 checksum of data. -func Checksum(data []byte) uint32 { - if !hasSSE3 || len(data) < 64 { - return update(1, data) - } - - if hasAVX2 { - return adler32_avx2(1, data) - } - return adler32_sse3(1, data) -} diff --git a/internal/adler32/adler32_arm64.go b/internal/adler32/adler32_arm64.go deleted file mode 100644 index ddf9cb5e..00000000 --- a/internal/adler32/adler32_arm64.go +++ /dev/null @@ -1,73 +0,0 @@ -//go:build arm64 && !purego - -package adler32 - -import ( - "encoding/binary" - "errors" - "hash" -) - -// The size of an Adler-32 checksum in bytes. -const Size = 4 - -// digest represents the partial evaluation of a checksum. -// The low 16 bits are s1, the high 16 bits are s2. -type digest uint32 - -func (d *digest) Reset() { *d = 1 } - -// New returns a new hash.Hash32 computing the Adler-32 checksum. -func New() hash.Hash32 { - d := new(digest) - d.Reset() - return d -} - -func (d *digest) MarshalBinary() ([]byte, error) { - b := make([]byte, 0, marshaledSize) - b = append(b, magic...) - b = binary.BigEndian.AppendUint32(b, uint32(*d)) - return b, nil -} - -func (d *digest) UnmarshalBinary(b []byte) error { - if len(b) < len(magic) || string(b[:len(magic)]) != magic { - return errors.New("hash/adler32: invalid hash state identifier") - } - if len(b) != marshaledSize { - return errors.New("hash/adler32: invalid hash state size") - } - *d = digest(binary.BigEndian.Uint32(b[len(magic):])) - return nil -} - -func (d *digest) Size() int { return Size } - -func (d *digest) BlockSize() int { return 4 } - -func (d *digest) Write(data []byte) (nn int, err error) { - if len(data) >= 64 { - h := adler32_neon(uint32(*d), data) - *d = digest(h) - } else { - h := update(uint32(*d), data) - *d = digest(h) - } - return len(data), nil -} - -func (d *digest) Sum32() uint32 { return uint32(*d) } - -func (d *digest) Sum(in []byte) []byte { - s := uint32(*d) - return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s)) -} - -// Checksum returns the Adler-32 checksum of data. -func Checksum(data []byte) uint32 { - if len(data) >= 64 { - return adler32_neon(1, data) - } - return update(1, data) -} diff --git a/internal/adler32/adler32_avx2.go b/internal/adler32/adler32_avx2.go deleted file mode 100644 index 042812b8..00000000 --- a/internal/adler32/adler32_avx2.go +++ /dev/null @@ -1,6 +0,0 @@ -//go:build !purego && amd64 - -package adler32 - -//go:noescape -func adler32_avx2(in uint32, buf []byte) uint32 diff --git a/internal/adler32/adler32_avx2.s b/internal/adler32/adler32_avx2.s deleted file mode 100644 index 1b9a1c50..00000000 --- a/internal/adler32/adler32_avx2.s +++ /dev/null @@ -1,263 +0,0 @@ -//go:build !purego && amd64 - -#include "textflag.h" - -DATA weights_1_32<>+0x00(SB)/8, $0x191a1b1c1d1e1f20 -DATA weights_1_32<>+0x08(SB)/8, $0x1112131415161718 -DATA weights_1_32<>+0x10(SB)/8, $0x090a0b0c0d0e0f10 -DATA weights_1_32<>+0x18(SB)/8, $0x0102030405060708 -GLOBL weights_1_32<>(SB), (RODATA|NOPTR), $32 - -DATA ones_u16<>+0x00(SB)/8, $0x0001000100010001 -DATA ones_u16<>+0x08(SB)/8, $0x0001000100010001 -DATA ones_u16<>+0x10(SB)/8, $0x0001000100010001 -DATA ones_u16<>+0x18(SB)/8, $0x0001000100010001 -GLOBL ones_u16<>(SB), (RODATA|NOPTR), $32 - -DATA one_u16<>+0x00(SB)/2, $0x0001 -GLOBL one_u16<>(SB), (RODATA|NOPTR), $2 - -TEXT ·adler32_avx2(SB), NOSPLIT, $0-36 - MOVLQZX in+0(FP), DI - MOVQ buf_base+8(FP), SI - MOVQ buf_len+16(FP), DX - MOVQ buf_cap+24(FP), CX - WORD $0x8548; BYTE $0xf6 - JE return_one - WORD $0xf889 - WORD $0x8548; BYTE $0xd2 - JE return_result - NOP - NOP - NOP - WORD $0xc189 - WORD $0xe9c1; BYTE $0x10 - WORD $0xb70f; BYTE $0xc0 - CMPQ DX, $0x20 - JB tail16_check - LONG $0x078071bf; BYTE $0x80 - LONG $0xc0eff9c5 - VMOVDQA weights_1_32<>(SB), Y1 - VPBROADCASTW one_u16<>(SB), Y2 - JMP block_loop_setup - -block_accum_init: - LONG $0xf46ffdc5 - LONG $0xedefd1c5 - -block_reduce: - SUBQ AX, DX - LONG $0xf572ddc5; BYTE $0x05 - LONG $0xdbfeddc5 - LONG $0x397de3c4; WORD $0x01f4 - LONG $0xecc6c8c5; BYTE $0x88 - LONG $0xe470f9c5; BYTE $0x88 - LONG $0xe4fed1c5 - LONG $0xec70f9c5; BYTE $0x55 - LONG $0xe4fed1c5 - LONG $0xe07ef9c5 - MOVQ AX, CX - IMULQ DI, CX - SHRQ $0x2f, CX - LONG $0xfff1c969; WORD $0x0000 - WORD $0xc829 - LONG $0x397de3c4; WORD $0x01dc - LONG $0xdbfed9c5 - LONG $0xe370f9c5; BYTE $0xee - LONG $0xdcfee1c5 - LONG $0xe370f9c5; BYTE $0x55 - LONG $0xdbfed9c5 - LONG $0xd97ef9c5 - MOVQ CX, R8 - IMULQ DI, R8 - SHRQ $0x2f, R8 - LONG $0xf1c06945; WORD $0x00ff; BYTE $0x00 - WORD $0x2944; BYTE $0xc1 - CMPQ DX, $0x1f - JBE tail_check - -block_loop_setup: - LONG $0xe06ef9c5 - LONG $0xd96ef9c5 - CMPQ DX, $0x15b0 - LONG $0x15b0b841; WORD $0x0000 - LONG $0xc2420f4c - WORD $0x8944; BYTE $0xc0 - LONG $0x001fe025; BYTE $0x00 - JE block_accum_init - ADDQ $-0x20, R8 - LONG $0xedefd1c5 - LONG $0x20c0f641 - JNE block_loop_entry - LONG $0x2e6ffec5 - ADDQ $0x20, SI - LEAQ -0x20(AX), CX - LONG $0xf0f6d5c5 - LONG $0xf4fecdc5 - LONG $0x0455e2c4; BYTE $0xe9 - LONG $0xeaf5d5c5 - LONG $0xdbfed5c5 - LONG $0xec6ffdc5 - LONG $0xe66ffdc5 - CMPQ R8, $0x20 - JAE block_loop_64 - JMP block_reduce - -block_loop_entry: - MOVQ AX, CX - CMPQ R8, $0x20 - JB block_reduce - -block_loop_64: - LONG $0x366ffec5 - LONG $0x7e6ffec5; BYTE $0x20 - LONG $0xc0f64dc5 - LONG $0xc4fe3dc5 - LONG $0xecfed5c5 - LONG $0x044de2c4; BYTE $0xe1 - LONG $0xe2f5ddc5 - LONG $0xdbfeddc5 - ADDQ $0x40, SI - LONG $0xe0f6c5c5 - LONG $0xe4febdc5 - LONG $0xedfebdc5 - LONG $0x0445e2c4; BYTE $0xf1 - LONG $0xf2f5cdc5 - LONG $0xdbfecdc5 - ADDQ $-0x40, CX - JNE block_loop_64 - LONG $0xf46ffdc5 - JMP block_reduce - -return_one: - LONG $0x000001b8; BYTE $0x00 - -return_result: - MOVL AX, ret+32(FP) - RET - -tail_check: - WORD $0x8548; BYTE $0xd2 - JE return_no_tail - -tail16_check: - CMPQ DX, $0x10 - JB tail_bytes_setup - WORD $0xb60f; BYTE $0x3e - WORD $0xf801 - WORD $0xc101 - LONG $0x017eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x0246b60f - WORD $0xf801 - WORD $0xc101 - LONG $0x037eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x0446b60f - WORD $0xf801 - WORD $0xc101 - LONG $0x057eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x0646b60f - WORD $0xf801 - WORD $0xc101 - LONG $0x077eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x0846b60f - WORD $0xf801 - WORD $0xc101 - LONG $0x097eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x0a46b60f - WORD $0xf801 - WORD $0xc101 - LONG $0x0b7eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x0c46b60f - WORD $0xf801 - WORD $0xc101 - LONG $0x0d7eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x46b60f44; BYTE $0x0e - WORD $0x0141; BYTE $0xf8 - WORD $0x0144; BYTE $0xc1 - LONG $0x0f46b60f - WORD $0x0144; BYTE $0xc0 - WORD $0xc101 - ADDQ $-0x10, DX - JE final_reduce - ADDQ $0x10, SI - -tail_bytes_setup: - LEAQ -0x1(DX), DI - MOVQ DX, R9 - ANDQ $0x3, R9 - JE tail_dword_setup - XORL R8, R8 - -tail_byte_loop: - LONG $0x14b60f46; BYTE $0x06 - WORD $0x0144; BYTE $0xd0 - WORD $0xc101 - INCQ R8 - CMPQ R9, R8 - JNE tail_byte_loop - ADDQ R8, SI - SUBQ R8, DX - -tail_dword_setup: - CMPQ DI, $0x3 - JB final_reduce - XORL DI, DI - -tail_dword_loop: - LONG $0x04b60f44; BYTE $0x3e - WORD $0x0141; BYTE $0xc0 - WORD $0x0144; BYTE $0xc1 - LONG $0x3e44b60f; BYTE $0x01 - WORD $0x0144; BYTE $0xc0 - WORD $0xc101 - LONG $0x44b60f44; WORD $0x023e - WORD $0x0141; BYTE $0xc0 - WORD $0x0144; BYTE $0xc1 - LONG $0x3e44b60f; BYTE $0x03 - WORD $0x0144; BYTE $0xc0 - WORD $0xc101 - ADDQ $0x4, DI - CMPQ DX, DI - JNE tail_dword_loop - -final_reduce: - LONG $0x000f908d; WORD $0xffff - CMPL AX, $0xfff1 - WORD $0x420f; BYTE $0xd0 - WORD $0xc889 - LONG $0x078071be; BYTE $0x80 - IMULQ AX, SI - SHRQ $0x2f, SI - LONG $0xfff1c669; WORD $0x0000 - WORD $0xc129 - WORD $0xe1c1; BYTE $0x10 - WORD $0xd109 - WORD $0xc889 - NOP - NOP - VZEROUPPER - MOVL AX, ret+32(FP) - RET - -return_no_tail: - WORD $0xe1c1; BYTE $0x10 - WORD $0xc809 - NOP - NOP - VZEROUPPER - MOVL AX, ret+32(FP) - RET diff --git a/internal/adler32/adler32_fallback.go b/internal/adler32/adler32_fallback.go deleted file mode 100644 index c213c3c1..00000000 --- a/internal/adler32/adler32_fallback.go +++ /dev/null @@ -1,19 +0,0 @@ -//go:build (!arm64 && !amd64) || purego - -package adler32 - -import ( - "hash" - "hash/adler32" -) - -// The size of an Adler-32 checksum in bytes. -const Size = 4 - -// New returns a new hash.Hash32 computing the Adler-32 checksum. -func New() hash.Hash32 { - return adler32.New() -} - -// Checksum returns the Adler-32 checksum of data. -func Checksum(data []byte) uint32 { return adler32.Checksum(data) } diff --git a/internal/adler32/adler32_generic.go b/internal/adler32/adler32_generic.go deleted file mode 100644 index f33e0f9b..00000000 --- a/internal/adler32/adler32_generic.go +++ /dev/null @@ -1,45 +0,0 @@ -// Package adler32 implements the Adler-32 checksum. -package adler32 - -const ( - // mod is the largest prime that is less than 65536. - mod = 65521 - // nmax is the largest n such that - // 255 * n * (n+1) / 2 + (n+1) * (mod-1) <= 2^32-1. - // It is mentioned in RFC 1950 (search for "5552"). - nmax = 5552 - - // binary representation compatible with standard library. - magic = "adl\x01" - marshaledSize = len(magic) + 4 -) - -// Add p to the running checksum d. -func update(d uint32, p []byte) uint32 { - s1, s2 := d&0xffff, d>>16 - for len(p) > 0 { - var q []byte - if len(p) > nmax { - p, q = p[:nmax], p[nmax:] - } - for len(p) >= 4 { - s1 += uint32(p[0]) - s2 += s1 - s1 += uint32(p[1]) - s2 += s1 - s1 += uint32(p[2]) - s2 += s1 - s1 += uint32(p[3]) - s2 += s1 - p = p[4:] - } - for _, x := range p { - s1 += uint32(x) - s2 += s1 - } - s1 %= mod - s2 %= mod - p = q - } - return s2<<16 | s1 -} diff --git a/internal/adler32/adler32_neon.go b/internal/adler32/adler32_neon.go deleted file mode 100644 index 521b71e0..00000000 --- a/internal/adler32/adler32_neon.go +++ /dev/null @@ -1,6 +0,0 @@ -//go:build !purego && arm64 - -package adler32 - -//go:noescape -func adler32_neon(in uint32, buf []byte) uint32 diff --git a/internal/adler32/adler32_neon.s b/internal/adler32/adler32_neon.s deleted file mode 100644 index 08b170bd..00000000 --- a/internal/adler32/adler32_neon.s +++ /dev/null @@ -1,208 +0,0 @@ -//go:build !purego && arm64 - -#include "textflag.h" - -DATA mult_table<>+0x00(SB)/8, $0x001d001e001f0020 -DATA mult_table<>+0x08(SB)/8, $0x0019001a001b001c -DATA mult_table<>+0x10(SB)/8, $0x0015001600170018 -DATA mult_table<>+0x18(SB)/8, $0x0011001200130014 -DATA mult_table<>+0x20(SB)/8, $0x000d000e000f0010 -DATA mult_table<>+0x28(SB)/8, $0x0009000a000b000c -DATA mult_table<>+0x30(SB)/8, $0x0005000600070008 -DATA mult_table<>+0x38(SB)/8, $0x0001000200030004 -GLOBL mult_table<>(SB), (RODATA|NOPTR), $64 - -TEXT ·adler32_neon(SB), NOSPLIT, $0-36 - MOVW in+0(FP), R0 - MOVD buf_base+8(FP), R1 - MOVD buf_len+16(FP), R2 - MOVD buf_cap+24(FP), R3 - NOP - ANDS $15, R1, R10 - ANDW $65535, R0, R8 - LSRW $16, R0, R9 - NOP - BEQ vector_loop_setup - ADD $1, R1, R11 - MOVD R1, R12 - -align_loop: - WORD $0x3840158d - SUB $1, R2, R2 - TST $15, R11 - ADD $1, R11, R11 - ADDW R13, R8, R8 - ADDW R9, R8, R9 - BNE align_loop - MOVW $32881, R11 - MOVW $65521, R13 - MOVKW $(32775<<16), R11 - MOVW $4294901775, R12 - MOVW $65520, R14 - SUB R10, R1, R10 - UMULL R11, R9, R11 - ADDW R12, R8, R12 - CMPW R14, R8 - ADD $16, R10, R1 - LSR $47, R11, R11 - CSELW HI, R12, R8, R8 - MSUBW R13, R9, R11, R9 - -vector_loop_setup: - AND $31, R2, R10 - CMP $32, R2 - BCC tail_entry - MOVD $mult_table<>(SB), R11 - ADD $0, R11, R11 - MOVW $32881, R14 - MOVW $173, R12 - MOVD $137438953440, R13 - MOVKW $(32775<<16), R14 - VLD1 (R11), [V0.H8, V1.H8, V2.H8, V3.H8] - LSR $5, R2, R11 - MOVW $65521, R15 - VEXT $8, V0.B16, V0.B16, V4.B16 - VEXT $8, V1.B16, V1.B16, V5.B16 - VEXT $8, V2.B16, V2.B16, V6.B16 - VEXT $8, V3.B16, V3.B16, V7.B16 - -vector_outer_loop: - CMP $173, R11 - MOVD R1, R2 - CSEL LO, R11, R12, R16 - WORD $0x6f00e414 - MULW R16, R8, R0 - ADD R16<<5, R13, R17 - WORD $0x6f00e410 - AND $137438953440, R17, R17 - WORD $0x6f00e412 - WORD $0x6f00e413 - WORD $0x6f00e415 - VMOV R0, V20.S[3] - MOVW R16, R0 - WORD $0x6f00e411 - -vector_inner_loop: - WORD $0xacc15857 - SUBSW $1, R0, R0 - VADD V17.S4, V20.S4, V20.S4 - WORD $0x2e3712b5 - WORD $0x6e371273 - WORD $0x6e202ad8 - WORD $0x2e361252 - WORD $0x6e361210 - WORD $0x6e206af8 - WORD $0x6e606b11 - BNE vector_inner_loop - VSHL $5, V20.S4, V20.S4 - ADD R17, R1, R17 - SUBS R16, R11, R11 - ADD $32, R17, R1 - WORD $0x2e6082b4 - VEXT $8, V21.B16, V21.B16, V21.B16 - WORD $0x2e6482b4 - VEXT $8, V19.B16, V19.B16, V21.B16 - WORD $0x2e618274 - VEXT $8, V18.B16, V18.B16, V19.B16 - WORD $0x2e6582b4 - WORD $0x2e628254 - WORD $0x2e668274 - WORD $0x2e638214 - VEXT $8, V16.B16, V16.B16, V16.B16 - WORD $0x2e678214 - WORD $0x4eb1be30 - WORD $0x4eb4be91 - WORD $0x0eb1be10 - VMOV V16.S[1], R0 - FMOVS F16, R2 - ADDW R8, R2, R8 - ADDW R9, R0, R9 - UMULL R14, R8, R0 - UMULL R14, R9, R2 - LSR $47, R0, R0 - LSR $47, R2, R2 - MSUBW R15, R8, R0, R8 - MSUBW R15, R9, R2, R9 - BNE vector_outer_loop - -tail_entry: - CBZ R10, return_result - CMP $16, R10 - BCC tail_byte_loop - WORD $0x3940002b - SUBS $16, R10, R10 - WORD $0x3940042c - WORD $0x3940082d - ADDW R11, R8, R8 - WORD $0x39400c2b - ADDW R9, R8, R9 - ADDW R12, R8, R8 - WORD $0x3940102c - ADDW R8, R9, R9 - ADDW R13, R8, R8 - WORD $0x3940142d - ADDW R8, R9, R9 - ADDW R11, R8, R8 - WORD $0x3940182b - ADDW R8, R9, R9 - ADDW R12, R8, R8 - WORD $0x39401c2c - ADDW R8, R9, R9 - ADDW R13, R8, R8 - ADDW R8, R9, R9 - ADDW R11, R8, R8 - WORD $0x3940202b - ADDW R8, R9, R9 - ADDW R12, R8, R8 - WORD $0x3940242c - ADDW R8, R9, R9 - WORD $0x3940382d - ADDW R11, R8, R8 - WORD $0x3940282b - ADDW R8, R9, R9 - ADDW R12, R8, R8 - WORD $0x39402c2c - ADDW R8, R9, R9 - ADDW R11, R8, R8 - WORD $0x3940302b - ADDW R8, R9, R9 - ADDW R12, R8, R8 - WORD $0x3940342c - ADDW R8, R9, R9 - ADDW R11, R8, R8 - WORD $0x39403c2b - ADDW R8, R9, R9 - ADDW R12, R8, R8 - ADDW R8, R9, R9 - ADDW R13, R8, R8 - ADDW R8, R9, R9 - ADDW R11, R8, R8 - ADDW R8, R9, R9 - BEQ final_reduce - ADD $16, R1, R1 - -tail_byte_loop: - WORD $0x3840142b - SUBS $1, R10, R10 - ADDW R11, R8, R8 - ADDW R9, R8, R9 - BNE tail_byte_loop - -final_reduce: - MOVW $32881, R10 - MOVW $65521, R12 - MOVKW $(32775<<16), R10 - MOVW $4294901775, R11 - MOVW $65520, R13 - ADDW R11, R8, R11 - UMULL R10, R9, R10 - CMPW R13, R8 - CSELW HI, R11, R8, R8 - LSR $47, R10, R10 - MSUBW R12, R9, R10, R9 - -return_result: - ORRW R9<<16, R8, R0 - NOP - MOVW R0, ret+32(FP) - RET diff --git a/internal/adler32/adler32_sse3.go b/internal/adler32/adler32_sse3.go deleted file mode 100644 index 8e8c8a9b..00000000 --- a/internal/adler32/adler32_sse3.go +++ /dev/null @@ -1,6 +0,0 @@ -//go:build !purego && amd64 - -package adler32 - -//go:noescape -func adler32_sse3(in uint32, buf []byte) uint32 diff --git a/internal/adler32/adler32_sse3.s b/internal/adler32/adler32_sse3.s deleted file mode 100644 index 5880bab8..00000000 --- a/internal/adler32/adler32_sse3.s +++ /dev/null @@ -1,214 +0,0 @@ -//go:build !purego && amd64 - -#include "textflag.h" - -DATA weights_17_32<>+0x00(SB)/8, $0x191a1b1c1d1e1f20 -DATA weights_17_32<>+0x08(SB)/8, $0x1112131415161718 -GLOBL weights_17_32<>(SB), (RODATA|NOPTR), $16 - -DATA ones_u16<>+0x00(SB)/8, $0x0001000100010001 -DATA ones_u16<>+0x08(SB)/8, $0x0001000100010001 -GLOBL ones_u16<>(SB), (RODATA|NOPTR), $16 - -DATA weights_1_16<>+0x00(SB)/8, $0x090a0b0c0d0e0f10 -DATA weights_1_16<>+0x08(SB)/8, $0x0102030405060708 -GLOBL weights_1_16<>(SB), (RODATA|NOPTR), $16 - -TEXT ·adler32_sse3(SB), NOSPLIT, $0-36 - MOVLQZX in+0(FP), DI - MOVQ buf_base+8(FP), SI - MOVQ buf_len+16(FP), DX - MOVQ buf_cap+24(FP), CX - NOP - NOP - NOP - WORD $0xf889 - LONG $0xc8b70f44 - WORD $0xe8c1; BYTE $0x10 - WORD $0xd189 - WORD $0xe183; BYTE $0x1f - CMPQ DX, $0x20 - JAE block_loop_setup - WORD $0x8944; BYTE $0xcf - JMP tail_entry - -block_loop_setup: - SHRQ $0x5, DX - LONG $0xc0ef0f66 - MOVO weights_17_32<>(SB), X1 - MOVO ones_u16<>(SB), X2 - MOVO weights_1_16<>(SB), X3 - LONG $0x8071b841; WORD $0x8007 - -block_outer_loop: - CMPQ DX, $0xad - LONG $0x00adba41; WORD $0x0000 - LONG $0xd2420f4c - WORD $0x8944; BYTE $0xcf - LONG $0xfaaf0f41 - LONG $0xef6e0f66 - LONG $0xe06e0f66 - WORD $0x8944; BYTE $0xd0 - LONG $0xf6ef0f66 - -block_inner_loop: - LONG $0x3e6f0ff3 - LONG $0x6f0f4466; BYTE $0xc7 - LONG $0x04380f66; BYTE $0xf9 - LONG $0xfaf50f66 - LONG $0xfcfe0f66 - LONG $0x666f0ff3; BYTE $0x10 - LONG $0xeefe0f66 - LONG $0xf60f4466; BYTE $0xc0 - LONG $0xfe0f4466; BYTE $0xc6 - LONG $0xf46f0f66 - LONG $0xf0f60f66 - LONG $0xfe0f4166; BYTE $0xf0 - LONG $0x04380f66; BYTE $0xe3 - LONG $0xe2f50f66 - LONG $0xe7fe0f66 - ADDQ $0x20, SI - WORD $0xc8ff - JNE block_inner_loop - LONG $0xf5720f66; BYTE $0x05 - LONG $0xe5fe0f66 - LONG $0xee700f66; BYTE $0xb1 - LONG $0xeefe0f66 - LONG $0xf5700f66; BYTE $0xee - LONG $0xf5fe0f66 - LONG $0xf77e0f66 - WORD $0x0144; BYTE $0xcf - LONG $0xec700f66; BYTE $0xb1 - LONG $0xecfe0f66 - LONG $0xe5700f66; BYTE $0xee - LONG $0xe5fe0f66 - LONG $0xe07e0f66 - MOVQ DI, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00 - WORD $0x2944; BYTE $0xcf - MOVQ AX, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00 - WORD $0x2944; BYTE $0xc8 - WORD $0x8941; BYTE $0xf9 - SUBQ R10, DX - JNE block_outer_loop - -tail_entry: - WORD $0x8548; BYTE $0xc9 - JE return_result - CMPL CX, $0x10 - JB tail_bytes_setup - WORD $0xb60f; BYTE $0x16 - WORD $0xd701 - WORD $0xf801 - LONG $0x0156b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x027eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0356b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x047eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0556b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x067eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0756b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x087eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0956b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x0a7eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0b56b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x0c7eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0d56b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x46b60f44; BYTE $0x0e - WORD $0x0141; BYTE $0xd0 - WORD $0x0144; BYTE $0xc0 - LONG $0x0f7eb60f - WORD $0x0144; BYTE $0xc7 - WORD $0xf801 - ADDQ $-0x10, CX - JE final_reduce - ADDQ $0x10, SI - -tail_bytes_setup: - LEAQ -0x1(CX), DX - MOVQ CX, R9 - ANDQ $0x3, R9 - JE tail_dword_setup - XORL R8, R8 - -tail_byte_loop: - LONG $0x14b60f46; BYTE $0x06 - WORD $0x0144; BYTE $0xd7 - WORD $0xf801 - INCQ R8 - CMPQ R9, R8 - JNE tail_byte_loop - ADDQ R8, SI - SUBQ R8, CX - -tail_dword_setup: - CMPQ DX, $0x3 - JB final_reduce - XORL DX, DX - -tail_dword_loop: - LONG $0x04b60f44; BYTE $0x16 - WORD $0x0141; BYTE $0xf8 - WORD $0x0144; BYTE $0xc0 - LONG $0x167cb60f; BYTE $0x01 - WORD $0x0144; BYTE $0xc7 - WORD $0xf801 - LONG $0x44b60f44; WORD $0x0216 - WORD $0x0141; BYTE $0xf8 - WORD $0x0144; BYTE $0xc0 - LONG $0x167cb60f; BYTE $0x03 - WORD $0x0144; BYTE $0xc7 - WORD $0xf801 - ADDQ $0x4, DX - CMPQ CX, DX - JNE tail_dword_loop - -final_reduce: - LONG $0x000f8f8d; WORD $0xffff - CMPL DI, $0xfff1 - WORD $0x420f; BYTE $0xcf - WORD $0xc289 - LONG $0x078071be; BYTE $0x80 - IMULQ DX, SI - SHRQ $0x2f, SI - LONG $0xfff1d669; WORD $0x0000 - WORD $0xd029 - WORD $0xcf89 - -return_result: - WORD $0xe0c1; BYTE $0x10 - WORD $0xf809 - NOP - NOP - MOVL AX, ret+32(FP) - RET diff --git a/internal/adler32/bench_test.go b/internal/adler32/bench_test.go deleted file mode 100644 index 7744b903..00000000 --- a/internal/adler32/bench_test.go +++ /dev/null @@ -1,22 +0,0 @@ -package adler32 - -import ( - "testing" -) - -const benchmarkSize = 64 * 1024 - -var data = make([]byte, benchmarkSize) - -func init() { - for i := range benchmarkSize { - data[i] = byte(i % 256) - } -} - -func BenchmarkChecksum(b *testing.B) { - b.ReportAllocs() - for range b.N { - Checksum(data) - } -} diff --git a/internal/bloom/bloom.go b/internal/bloom/bloom.go deleted file mode 100644 index c1a116d6..00000000 --- a/internal/bloom/bloom.go +++ /dev/null @@ -1,236 +0,0 @@ -// Package bloom provides a bloom filter implementation used for changed-path -// filters in Git commit graphs. -package bloom - -import "encoding/binary" - -const ( - // DataHeaderSize is the size of the BDAT header in commit-graph files. - DataHeaderSize = 3 * 4 - // DefaultMaxChange matches Git's default max-changed-paths behavior. - DefaultMaxChange = 512 -) - -// Settings describe the changed-paths Bloom filter parameters stored in -// commit-graph BDAT chunks. -// -// Obviously, they must match the repository's commit-graph settings to -// interpret filters correctly. -type Settings struct { - HashVersion uint32 - NumHashes uint32 - BitsPerEntry uint32 - MaxChangePaths uint32 -} - -// Filter represents a changed-paths Bloom filter associated with a commit. -// -// The filter encodes which paths changed between a commit and its first -// parent. Paths are expected to be in Git's slash-separated form and -// are queried using a path and its prefixes (e.g. "a/b/c", "a/b", "a"). -type Filter struct { - Data []byte - Version uint32 -} - -// ParseSettings reads Bloom filter settings from a BDAT chunk header. -func ParseSettings(bdat []byte) (*Settings, error) { - if len(bdat) < DataHeaderSize { - return nil, ErrInvalid - } - settings := &Settings{ - HashVersion: binary.BigEndian.Uint32(bdat[0:4]), - NumHashes: binary.BigEndian.Uint32(bdat[4:8]), - BitsPerEntry: binary.BigEndian.Uint32(bdat[8:12]), - MaxChangePaths: DefaultMaxChange, - } - return settings, nil -} - -// MightContain reports whether the Bloom filter may contain the given path. -// -// Evaluated against the full path and each of its directory prefixes. A true -// result indicates a possible match; false means the path definitely did not -// change. -func (f *Filter) MightContain(path []byte, settings *Settings) bool { - if f == nil || settings == nil { - return false - } - if len(f.Data) == 0 { - return false - } - keys := keyvec(path, settings) - for i := range keys { - if filterContainsKey(f, &keys[i], settings) { - return true - } - } - return false -} - -type key struct { - hashes []uint32 -} - -func keyvec(path []byte, settings *Settings) []key { - if len(path) == 0 { - return nil - } - count := 1 - for _, b := range path { - if b == '/' { - count++ - } - } - keys := make([]key, 0, count) - keys = append(keys, keyFill(path, settings)) - for i := len(path) - 1; i >= 0; i-- { - if path[i] == '/' { - keys = append(keys, keyFill(path[:i], settings)) - } - } - return keys -} - -func keyFill(path []byte, settings *Settings) key { - const seed0 = 0x293ae76f - const seed1 = 0x7e646e2c - var h0, h1 uint32 - if settings.HashVersion == 2 { - h0 = murmur3SeededV2(seed0, path) - h1 = murmur3SeededV2(seed1, path) - } else { - h0 = murmur3SeededV1(seed0, path) - h1 = murmur3SeededV1(seed1, path) - } - hashes := make([]uint32, settings.NumHashes) - for i := uint32(0); i < settings.NumHashes; i++ { - hashes[i] = h0 + i*h1 - } - return key{hashes: hashes} -} - -func filterContainsKey(filter *Filter, key *key, settings *Settings) bool { - if filter == nil || key == nil || settings == nil { - return false - } - if len(filter.Data) == 0 { - return false - } - mod := uint64(len(filter.Data)) * 8 - for _, h := range key.hashes { - idx := uint64(h) % mod - bytePos := idx / 8 - bit := byte(1 << (idx & 7)) - if filter.Data[bytePos]&bit == 0 { - return false - } - } - return true -} - -func murmur3SeededV2(seed uint32, data []byte) uint32 { - const ( - c1 = 0xcc9e2d51 - c2 = 0x1b873593 - r1 = 15 - r2 = 13 - m = 5 - n = 0xe6546b64 - ) - h := seed - nblocks := len(data) / 4 - for i := 0; i < nblocks; i++ { - k := uint32(data[4*i]) | - (uint32(data[4*i+1]) << 8) | - (uint32(data[4*i+2]) << 16) | - (uint32(data[4*i+3]) << 24) - k *= c1 - k = (k << r1) | (k >> (32 - r1)) - k *= c2 - - h ^= k - h = (h << r2) | (h >> (32 - r2)) - h = h*m + n - } - - var k1 uint32 - tail := data[nblocks*4:] - switch len(tail) & 3 { - case 3: - k1 ^= uint32(tail[2]) << 16 - fallthrough - case 2: - k1 ^= uint32(tail[1]) << 8 - fallthrough - case 1: - k1 ^= uint32(tail[0]) - k1 *= c1 - k1 = (k1 << r1) | (k1 >> (32 - r1)) - k1 *= c2 - h ^= k1 - } - - h ^= uint32(len(data)) - h ^= h >> 16 - h *= 0x85ebca6b - h ^= h >> 13 - h *= 0xc2b2ae35 - h ^= h >> 16 - return h -} - -func murmur3SeededV1(seed uint32, data []byte) uint32 { - const ( - c1 = 0xcc9e2d51 - c2 = 0x1b873593 - r1 = 15 - r2 = 13 - m = 5 - n = 0xe6546b64 - ) - h := seed - nblocks := len(data) / 4 - for i := 0; i < nblocks; i++ { - b0 := int8(data[4*i]) - b1 := int8(data[4*i+1]) - b2 := int8(data[4*i+2]) - b3 := int8(data[4*i+3]) - k := uint32(b0) | - (uint32(b1) << 8) | - (uint32(b2) << 16) | - (uint32(b3) << 24) - k *= c1 - k = (k << r1) | (k >> (32 - r1)) - k *= c2 - - h ^= k - h = (h << r2) | (h >> (32 - r2)) - h = h*m + n - } - - var k1 uint32 - tail := data[nblocks*4:] - switch len(tail) & 3 { - case 3: - k1 ^= uint32(int8(tail[2])) << 16 - fallthrough - case 2: - k1 ^= uint32(int8(tail[1])) << 8 - fallthrough - case 1: - k1 ^= uint32(int8(tail[0])) - k1 *= c1 - k1 = (k1 << r1) | (k1 >> (32 - r1)) - k1 *= c2 - h ^= k1 - } - - h ^= uint32(len(data)) - h ^= h >> 16 - h *= 0x85ebca6b - h ^= h >> 13 - h *= 0xc2b2ae35 - h ^= h >> 16 - return h -} diff --git a/internal/bloom/errors.go b/internal/bloom/errors.go deleted file mode 100644 index fe38d1bc..00000000 --- a/internal/bloom/errors.go +++ /dev/null @@ -1,5 +0,0 @@ -package bloom - -import "errors" - -var ErrInvalid = errors.New("bloom: invalid data") diff --git a/internal/bufpool/buffers.go b/internal/bufpool/buffers.go deleted file mode 100644 index 439e7e04..00000000 --- a/internal/bufpool/buffers.go +++ /dev/null @@ -1,189 +0,0 @@ -// Package bufpool provides a lightweight byte-buffer type with optional -// pooling. -package bufpool - -import "sync" - -const ( - // DefaultBufferCap is the minimum capacity a borrowed buffer will have. - // Borrow() will allocate or retrieve a buffer with at least this capacity. - DefaultBufferCap = 32 * 1024 - - // maxPooledBuffer defines the maximum capacity of a buffer that may be - // returned to the pool. Buffers larger than this will not be pooled to - // avoid unbounded memory usage. - maxPooledBuffer = 8 << 20 -) - -// Buffer is a growable byte container that optionally participates in a -// memory pool. A Buffer may be obtained through Borrow() or constructed -// directly from owned data via FromOwned(). -// -// A Buffer's underlying slice may grow as needed. When finished with a -// pooled buffer, the caller should invoke Release() to return it to the pool. -// -// Buffers must not be copied after first use; doing so can cause double-returns -// to the pool and data races. -// -//go:nocopy -type Buffer struct { - _ struct{} // for nocopy - buf []byte - pool poolIndex -} - -type poolIndex int8 - -const ( - unpooled poolIndex = -1 -) - -var sizeClasses = [...]int{ - DefaultBufferCap, - 64 << 10, - 128 << 10, - 256 << 10, - 512 << 10, - 1 << 20, - 2 << 20, - 4 << 20, - maxPooledBuffer, -} - -var bufferPools = func() []sync.Pool { - pools := make([]sync.Pool, len(sizeClasses)) - for i, classCap := range sizeClasses { - capCopy := classCap - pools[i].New = func() any { - buf := make([]byte, 0, capCopy) - return &buf - } - } - return pools -}() - -// Borrow retrieves a Buffer suitable for storing up to capHint bytes. -// The returned Buffer may come from an internal sync.Pool. -// -// If capHint is smaller than DefaultBufferCap, it is automatically raised -// to DefaultBufferCap. If no pooled buffer has sufficient capacity, a new -// unpooled buffer is allocated. -// -// The caller must call Release() when finished using the returned Buffer. -func Borrow(capHint int) Buffer { - if capHint < DefaultBufferCap { - capHint = DefaultBufferCap - } - classIdx, classCap, pooled := classFor(capHint) - if !pooled { - newBuf := make([]byte, 0, capHint) - return Buffer{buf: newBuf, pool: unpooled} - } - buf := bufferPools[classIdx].Get().(*[]byte) - if cap(*buf) < classCap { - *buf = make([]byte, 0, classCap) - } - slice := (*buf)[:0] - return Buffer{buf: slice, pool: poolIndex(classIdx)} -} - -// FromOwned constructs a Buffer from a caller-owned byte slice. The resulting -// Buffer does not participate in pooling and will never be returned to the -// internal pool when released. -func FromOwned(buf []byte) Buffer { - return Buffer{buf: buf, pool: unpooled} -} - -// Resize adjusts the length of the buffer to n bytes. If n exceeds the current -// capacity, the underlying storage is grown. If n is negative, it is treated -// as zero. -// -// The buffer's new contents beyond the previous length are undefined. -func (buf *Buffer) Resize(n int) { - if n < 0 { - n = 0 - } - buf.ensureCapacity(n) - buf.buf = buf.buf[:n] -} - -// Append copies the provided bytes onto the end of the buffer, growing its -// capacity if required. If src is empty, the method does nothing. -// -// The receiver retains ownership of the data; the caller may reuse src freely. -func (buf *Buffer) Append(src []byte) { - if len(src) == 0 { - return - } - start := len(buf.buf) - buf.ensureCapacity(start + len(src)) - buf.buf = buf.buf[:start+len(src)] - copy(buf.buf[start:], src) -} - -// Bytes returns the underlying byte slice that represents the current contents -// of the buffer. Modifying the returned slice modifies the Buffer itself. -func (buf *Buffer) Bytes() []byte { - return buf.buf -} - -// Release returns the buffer to the global pool if it originated from the -// pool and its capacity is no larger than maxPooledBuffer. After release, the -// Buffer becomes invalid and should not be used further. -// -// Releasing a non-pooled buffer has no effect beyond clearing its internal -// storage. -func (buf *Buffer) Release() { - if buf.buf == nil { - return - } - buf.returnToPool() - buf.buf = nil - buf.pool = unpooled -} - -// ensureCapacity grows the underlying buffer to accommodate the requested -// number of bytes. Growth doubles the capacity by default unless a larger -// expansion is needed. If the previous storage was pooled and not oversized, -// it is returned to the pool. -func (buf *Buffer) ensureCapacity(needed int) { - if cap(buf.buf) >= needed { - return - } - classIdx, classCap, pooled := classFor(needed) - var newBuf []byte - if pooled { - raw := bufferPools[classIdx].Get().(*[]byte) - if cap(*raw) < classCap { - *raw = make([]byte, 0, classCap) - } - newBuf = (*raw)[:len(buf.buf)] - } else { - newBuf = make([]byte, len(buf.buf), classCap) - } - copy(newBuf, buf.buf) - buf.returnToPool() - buf.buf = newBuf - if pooled { - buf.pool = poolIndex(classIdx) - } else { - buf.pool = unpooled - } -} - -func classFor(size int) (idx int, classCap int, ok bool) { - for i, class := range sizeClasses { - if size <= class { - return i, class, true - } - } - return -1, size, false -} - -func (buf *Buffer) returnToPool() { - if buf.pool == unpooled { - return - } - tmp := buf.buf[:0] - bufferPools[int(buf.pool)].Put(&tmp) -} diff --git a/internal/bufpool/buffers_test.go b/internal/bufpool/buffers_test.go deleted file mode 100644 index f5c006da..00000000 --- a/internal/bufpool/buffers_test.go +++ /dev/null @@ -1,77 +0,0 @@ -package bufpool - -import "testing" - -func TestBorrowBufferResizeAndAppend(t *testing.T) { - b := Borrow(1) - defer b.Release() - - if cap(b.buf) < DefaultBufferCap { - t.Fatalf("expected capacity >= %d, got %d", DefaultBufferCap, cap(b.buf)) - } - - b.Append([]byte("alpha")) - b.Append([]byte("beta")) - if got := string(b.Bytes()); got != "alphabeta" { - t.Fatalf("unexpected contents: %q", got) - } - - b.Resize(3) - if got := string(b.Bytes()); got != "alp" { - t.Fatalf("resize shrink mismatch: %q", got) - } - - b.Resize(8) - if len(b.Bytes()) != 8 { - t.Fatalf("expected len 8 after grow, got %d", len(b.Bytes())) - } - if prefix := string(b.Bytes()[:3]); prefix != "alp" { - t.Fatalf("prefix lost after grow: %q", prefix) - } -} - -func TestBorrowBufferRelease(t *testing.T) { - b := Borrow(DefaultBufferCap / 2) - b.Append([]byte("data")) - b.Release() - if b.buf != nil { - t.Fatal("expected buffer cleared after release") - } -} - -func TestBorrowUsesLargerPools(t *testing.T) { - const request = DefaultBufferCap * 4 - - classIdx, classCap, pooled := classFor(request) - if !pooled { - t.Fatalf("expected %d to map to a pooled class", request) - } - - b := Borrow(request) - if b.pool != poolIndex(classIdx) { - t.Fatalf("expected pooled buffer in class %d, got %d", classIdx, b.pool) - } - if cap(b.buf) != classCap { - t.Fatalf("expected capacity %d, got %d", classCap, cap(b.buf)) - } - b.Release() - - b2 := Borrow(request) - defer b2.Release() - if b2.pool != poolIndex(classIdx) { - t.Fatalf("expected pooled buffer in class %d on reuse, got %d", classIdx, b2.pool) - } - if cap(b2.buf) != classCap { - t.Fatalf("expected capacity %d on reuse, got %d", classCap, cap(b2.buf)) - } -} - -func TestGrowingBufferStaysPooled(t *testing.T) { - b := Borrow(DefaultBufferCap) - defer b.Release() - - b.Append(make([]byte, DefaultBufferCap*3)) - if b.pool == unpooled { - t.Fatal("buffer should stay pooled after growth within limit") - } -} diff --git a/internal/flatex/LICENSE b/internal/flatex/LICENSE deleted file mode 100644 index 2a7cf70d..00000000 --- a/internal/flatex/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright 2009 The Go Authors. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google LLC nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/internal/flatex/decompress.go b/internal/flatex/decompress.go deleted file mode 100644 index 065e23f3..00000000 --- a/internal/flatex/decompress.go +++ /dev/null @@ -1,38 +0,0 @@ -package flatex - -import ( - "io" - - "codeberg.org/lindenii/furgit/internal/bufpool" -) - -func DecompressSized(src []byte, sizeHint int) (bufpool.Buffer, int, error) { - d := sliceInflaterPool.Get().(*sliceInflater) - defer sliceInflaterPool.Put(d) - - if err := d.reset(src); err != nil { - return bufpool.Buffer{}, 0, err - } - - out := bufpool.Borrow(sizeHint) - out.Resize(0) - - for { - if len(d.toRead) > 0 { - out.Append(d.toRead) - d.toRead = nil - continue - } - if d.err != nil { - if d.err == io.EOF { - return out, d.pos, nil - } - out.Release() - return bufpool.Buffer{}, 0, d.err - } - d.step(d) - if d.err != nil && len(d.toRead) == 0 { - d.toRead = d.window.readFlush() - } - } -} diff --git a/internal/flatex/decompress_test.go b/internal/flatex/decompress_test.go deleted file mode 100644 index e53a6581..00000000 --- a/internal/flatex/decompress_test.go +++ /dev/null @@ -1,57 +0,0 @@ -package flatex - -import ( - "bytes" - stdflate "compress/flate" - "testing" -) - -func compressDeflate(t *testing.T, payload []byte) []byte { - t.Helper() - var buf bytes.Buffer - w, err := stdflate.NewWriter(&buf, stdflate.DefaultCompression) - if err != nil { - t.Fatalf("NewWriter: %v", err) - } - if _, err := w.Write(payload); err != nil { - t.Fatalf("Write: %v", err) - } - if err := w.Close(); err != nil { - t.Fatalf("Close: %v", err) - } - return buf.Bytes() -} - -func TestDecompressSized(t *testing.T) { - payload := bytes.Repeat([]byte("golang"), 32) - compressed := compressDeflate(t, payload) - - out, _, err := DecompressSized(compressed, 0) - if err != nil { - t.Fatalf("DecompressSized: %v", err) - } - defer out.Release() - - if !bytes.Equal(out.Bytes(), payload) { - t.Fatalf("unexpected payload: got %q", out.Bytes()) - } -} - -func TestDecompressSizedUsesHint(t *testing.T) { - payload := []byte("short") - compressed := compressDeflate(t, payload) - - const hint = 1 << 19 - out, _, err := DecompressSized(compressed, hint) - if err != nil { - t.Fatalf("DecompressSized: %v", err) - } - defer out.Release() - - if !bytes.Equal(out.Bytes(), payload) { - t.Fatalf("unexpected payload: got %q", out.Bytes()) - } - if cap(out.Bytes()) < hint { - t.Fatalf("expected capacity >= %d, got %d", hint, cap(out.Bytes())) - } -} diff --git a/internal/flatex/huffman.go b/internal/flatex/huffman.go deleted file mode 100644 index 32172dbb..00000000 --- a/internal/flatex/huffman.go +++ /dev/null @@ -1,245 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package flatex implements the DEFLATE compressed data format, described in -// RFC 1951. The [compress/gzip] and [compress/zlib] packages implement access -// to DEFLATE-based file formats. -package flatex - -import ( - "math/bits" - "strconv" - "sync" -) - -const ( - // The special code used to mark the end of a block. - endBlockMarker = 256 - maxCodeLen = 16 // max length of Huffman code - maxMatchOffset = 1 << 15 // The largest match offset - // The next three numbers come from the RFC section 3.2.7, with the - // additional proviso in section 3.2.5 which implies that distance codes - // 30 and 31 should never occur in compressed data. - maxNumLit = 286 - maxNumDist = 30 - numCodes = 19 // number of codes in Huffman meta-code -) - -// A CorruptInputError reports the presence of corrupt input at a given offset. -type CorruptInputError int64 - -func (e CorruptInputError) Error() string { - return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) -} - -// The data structure for decoding Huffman tables is based on that of -// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), -// For codes smaller than the table width, there are multiple entries -// (each combination of trailing bits has the same value). For codes -// larger than the table width, the table contains a link to an overflow -// table. The width of each entry in the link table is the maximum code -// size minus the chunk width. -// -// Note that you can do a lookup in the table even without all bits -// filled. Since the extra bits are zero, and the DEFLATE Huffman codes -// have the property that shorter codes come before longer ones, the -// bit length estimate in the result is a lower bound on the actual -// number of bits. -// -// See the following: -// https://github.com/madler/zlib/raw/master/doc/algorithm.txt - -// chunk & 15 is number of bits -// chunk >> 4 is value, including table link - -const ( - huffmanChunkBits = 9 - huffmanNumChunks = 1 << huffmanChunkBits - huffmanCountMask = 15 - huffmanValueShift = 4 -) - -type huffmanDecoder struct { - min int // the minimum code length - chunks [huffmanNumChunks]uint32 // chunks as described above - links [][]uint32 // overflow links - linkMask uint32 // mask the width of the link table -} - -// Initialize Huffman decoding tables from array of code lengths. -// Following this function, h is guaranteed to be initialized into a complete -// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a -// degenerate case where the tree has only a single symbol with length 1. Empty -// trees are permitted. -func (h *huffmanDecoder) init(lengths []int) bool { - // Sanity enables additional runtime tests during Huffman - // table construction. It's intended to be used during - // development to supplement the currently ad-hoc unit tests. - const sanity = false - - if h.min != 0 { - *h = huffmanDecoder{} - } - - // Count number of codes of each length, - // compute min and max length. - var count [maxCodeLen]int - var min, max int - for _, n := range lengths { - if n == 0 { - continue - } - if min == 0 || n < min { - min = n - } - if n > max { - max = n - } - count[n]++ - } - - // Empty tree. The decompressor.huffSym function will fail later if the tree - // is used. Technically, an empty tree is only valid for the HDIST tree and - // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree - // is guaranteed to fail since it will attempt to use the tree to decode the - // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is - // guaranteed to fail later since the compressed data section must be - // composed of at least one symbol (the end-of-block marker). - if max == 0 { - return true - } - - code := 0 - var nextcode [maxCodeLen]int - for i := min; i <= max; i++ { - code <<= 1 - nextcode[i] = code - code += count[i] - } - - // Check that the coding is complete (i.e., that we've - // assigned all 2-to-the-max possible bit sequences). - // Exception: To be compatible with zlib, we also need to - // accept degenerate single-code codings. See also - // TestDegenerateHuffmanCoding. - if code != 1<<uint(max) && (code != 1 || max != 1) { - return false - } - - h.min = min - if max > huffmanChunkBits { - numLinks := 1 << (uint(max) - huffmanChunkBits) - h.linkMask = uint32(numLinks - 1) - - // create link tables - link := nextcode[huffmanChunkBits+1] >> 1 - h.links = make([][]uint32, huffmanNumChunks-link) - for j := uint(link); j < huffmanNumChunks; j++ { - reverse := int(bits.Reverse16(uint16(j))) - reverse >>= uint(16 - huffmanChunkBits) - off := j - uint(link) - if sanity && h.chunks[reverse] != 0 { - panic("impossible: overwriting existing chunk") - } - h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1)) - h.links[off] = make([]uint32, numLinks) - } - } - - for i, n := range lengths { - if n == 0 { - continue - } - code := nextcode[n] - nextcode[n]++ - chunk := uint32(i<<huffmanValueShift | n) - reverse := int(bits.Reverse16(uint16(code))) - reverse >>= uint(16 - n) - if n <= huffmanChunkBits { - for off := reverse; off < len(h.chunks); off += 1 << uint(n) { - // We should never need to overwrite - // an existing chunk. Also, 0 is - // never a valid chunk, because the - // lower 4 "count" bits should be - // between 1 and 15. - if sanity && h.chunks[off] != 0 { - panic("impossible: overwriting existing chunk") - } - h.chunks[off] = chunk - } - } else { - j := reverse & (huffmanNumChunks - 1) - if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { - // Longer codes should have been - // associated with a link table above. - panic("impossible: not an indirect chunk") - } - value := h.chunks[j] >> huffmanValueShift - linktab := h.links[value] - reverse >>= huffmanChunkBits - for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { - if sanity && linktab[off] != 0 { - panic("impossible: overwriting existing chunk") - } - linktab[off] = chunk - } - } - } - - if sanity { - // Above we've sanity checked that we never overwrote - // an existing entry. Here we additionally check that - // we filled the tables completely. - for i, chunk := range h.chunks { - if chunk == 0 { - // As an exception, in the degenerate - // single-code case, we allow odd - // chunks to be missing. - if code == 1 && i%2 == 1 { - continue - } - panic("impossible: missing chunk") - } - } - for _, linktab := range h.links { - for _, chunk := range linktab { - if chunk == 0 { - panic("impossible: missing chunk") - } - } - } - } - - return true -} - -// RFC 1951 section 3.2.7. -// Compression with dynamic Huffman codes -var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} - -var ( - // Initialize the fixedHuffmanDecoder only once upon first use. - fixedOnce sync.Once - fixedHuffmanDecoder huffmanDecoder -) - -func fixedHuffmanDecoderInit() { - fixedOnce.Do(func() { - // These come from the RFC section 3.2.6. - var bits [288]int - for i := 0; i < 144; i++ { - bits[i] = 8 - } - for i := 144; i < 256; i++ { - bits[i] = 9 - } - for i := 256; i < 280; i++ { - bits[i] = 7 - } - for i := 280; i < 288; i++ { - bits[i] = 8 - } - fixedHuffmanDecoder.init(bits[:]) - }) -} diff --git a/internal/flatex/slice_inflate.go b/internal/flatex/slice_inflate.go deleted file mode 100644 index f9120143..00000000 --- a/internal/flatex/slice_inflate.go +++ /dev/null @@ -1,472 +0,0 @@ -package flatex - -import ( - "io" - "math/bits" - "sync" -) - -// sliceInflater is a specialized DEFLATE decoder that reads directly from an -// in-memory byte slice. It mirrors the main decompressor but avoids the -// overhead of the Reader interfaces, enabling faster byte-slice decoding. -type sliceInflater struct { - input []byte - pos int - roffset int64 - - b uint32 - nb uint - - h1, h2 huffmanDecoder - - bits *[maxNumLit + maxNumDist]int - codebits *[numCodes]int - - window windowDecoder - - toRead []byte - step func(*sliceInflater) - stepState int - final bool - err error - hl, hd *huffmanDecoder - copyLen int - copyDist int -} - -var sliceInflaterPool = sync.Pool{ - New: func() any { - fixedHuffmanDecoderInit() - return &sliceInflater{ - bits: new([maxNumLit + maxNumDist]int), - codebits: new([numCodes]int), - } - }, -} - -func (f *sliceInflater) reset(src []byte) error { - bits := f.bits - codebits := f.codebits - windowState := f.window - *f = sliceInflater{ - input: src, - bits: bits, - codebits: codebits, - window: windowState, - step: (*sliceInflater).nextBlock, - } - f.window.init(maxMatchOffset) - return nil -} - -func (f *sliceInflater) nextBlock() { - for f.nb < 1+2 { - if err := f.moreBits(); err != nil { - f.err = err - return - } - } - f.final = f.b&1 == 1 - f.b >>= 1 - typ := f.b & 3 - f.b >>= 2 - f.nb -= 1 + 2 - switch typ { - case 0: - f.dataBlock() - case 1: - f.hl = &fixedHuffmanDecoder - f.hd = nil - f.huffmanBlock() - case 2: - if err := f.readHuffman(); err != nil { - f.err = err - return - } - f.hl = &f.h1 - f.hd = &f.h2 - f.huffmanBlock() - default: - f.err = CorruptInputError(f.roffset) - } -} - -func (f *sliceInflater) huffmanBlock() { - const ( - stateInit = iota - stateDict - ) - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - { - v, err := f.huffSym(f.hl) - if err != nil { - f.err = err - return - } - var n uint - var length int - switch { - case v < 256: - f.window.writeByte(byte(v)) - if f.window.availWrite() == 0 { - f.toRead = f.window.readFlush() - f.step = (*sliceInflater).huffmanBlock - f.stepState = stateInit - return - } - goto readLiteral - case v == 256: - f.finishBlock() - return - case v < 265: - length = v - (257 - 3) - n = 0 - case v < 269: - length = v*2 - (265*2 - 11) - n = 1 - case v < 273: - length = v*4 - (269*4 - 19) - n = 2 - case v < 277: - length = v*8 - (273*8 - 35) - n = 3 - case v < 281: - length = v*16 - (277*16 - 67) - n = 4 - case v < 285: - length = v*32 - (281*32 - 131) - n = 5 - case v < maxNumLit: - length = 258 - n = 0 - default: - f.err = CorruptInputError(f.roffset) - return - } - if n > 0 { - for f.nb < n { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - length += int(f.b & uint32(1<<n-1)) - f.b >>= n - f.nb -= n - } - - var dist int - if f.hd == nil { - for f.nb < 5 { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 - } else { - if dist, err = f.huffSym(f.hd); err != nil { - f.err = err - return - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - extra := (dist & 1) << nb - for f.nb < nb { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - extra |= int(f.b & uint32(1<<nb-1)) - f.b >>= nb - f.nb -= nb - dist = 1<<(nb+1) + 1 + extra - default: - f.err = CorruptInputError(f.roffset) - return - } - - if dist > f.window.histSize() { - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, dist - goto copyHistory - } - -copyHistory: - { - cnt := f.window.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = f.window.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if f.window.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.window.readFlush() - f.step = (*sliceInflater).huffmanBlock - f.stepState = stateDict - return - } - goto readLiteral - } -} - -func (f *sliceInflater) dataBlock() { - f.nb = 0 - f.b = 0 - - if f.pos+4 > len(f.input) { - f.pos = len(f.input) - f.err = io.ErrUnexpectedEOF - return - } - hdr := f.input[f.pos : f.pos+4] - f.pos += 4 - f.roffset += 4 - n := int(hdr[0]) | int(hdr[1])<<8 - nn := int(hdr[2]) | int(hdr[3])<<8 - if uint16(nn) != uint16(^n) { - f.err = CorruptInputError(f.roffset) - return - } - - if n == 0 { - f.toRead = f.window.readFlush() - f.finishBlock() - return - } - - f.copyLen = n - f.copyData() -} - -func (f *sliceInflater) copyData() { - for { - if f.copyLen == 0 { - f.finishBlock() - return - } - buf := f.window.writeSlice() - if len(buf) == 0 { - f.toRead = f.window.readFlush() - f.step = (*sliceInflater).copyData - return - } - n := f.copyLen - if n > len(buf) { - n = len(buf) - } - if f.pos+n > len(f.input) { - f.err = io.ErrUnexpectedEOF - return - } - copy(buf[:n], f.input[f.pos:f.pos+n]) - f.pos += n - f.roffset += int64(n) - f.copyLen -= n - f.window.writeMark(n) - if f.window.availWrite() == 0 { - f.toRead = f.window.readFlush() - f.step = (*sliceInflater).copyData - return - } - } -} - -func (f *sliceInflater) finishBlock() { - if f.final { - if f.window.availRead() > 0 { - f.toRead = f.window.readFlush() - } - f.err = io.EOF - } - f.step = (*sliceInflater).nextBlock - f.stepState = 0 -} - -func (f *sliceInflater) moreBits() error { - if f.pos >= len(f.input) { - return io.ErrUnexpectedEOF - } - c := f.input[f.pos] - f.pos++ - f.roffset++ - f.b |= uint32(c) << (f.nb & 31) - f.nb += 8 - return nil -} - -func (f *sliceInflater) huffSym(h *huffmanDecoder) (int, error) { - n := uint(h.min) - nb, b := f.nb, f.b - for { - for nb < n { - if f.pos >= len(f.input) { - f.b = b - f.nb = nb - return 0, io.ErrUnexpectedEOF - } - c := f.input[f.pos] - f.pos++ - f.roffset++ - b |= uint32(c) << (nb & 31) - nb += 8 - } - chunk := h.chunks[b&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= nb { - if n == 0 { - f.b = b - f.nb = nb - f.err = CorruptInputError(f.roffset) - return 0, f.err - } - f.b = b >> (n & 31) - f.nb = nb - n - return int(chunk >> huffmanValueShift), nil - } - } -} - -func (f *sliceInflater) readHuffman() error { - for f.nb < 5+5+4 { - if err := f.moreBits(); err != nil { - return err - } - } - nlit := int(f.b&0x1F) + 257 - if nlit > maxNumLit { - return CorruptInputError(f.roffset) - } - f.b >>= 5 - ndist := int(f.b&0x1F) + 1 - if ndist > maxNumDist { - return CorruptInputError(f.roffset) - } - f.b >>= 5 - nclen := int(f.b&0xF) + 4 - f.b >>= 4 - f.nb -= 5 + 5 + 4 - codebits := f.codebits[:] - bits := f.bits[:] - clear(codebits) - clear(bits) - for i := 0; i < nclen; i++ { - for f.nb < 3 { - if err := f.moreBits(); err != nil { - return err - } - } - codebits[codeOrder[i]] = int(f.b & 0x7) - f.b >>= 3 - f.nb -= 3 - } - if !f.h1.init(codebits) { - return CorruptInputError(f.roffset) - } - for i := range bits { - bits[i] = 0 - } - i := 0 - for i < nlit+ndist { - x, err := f.huffSym(&f.h1) - if err != nil { - return err - } - switch { - case x < 16: - bits[i] = x - i++ - case x == 16: - if i == 0 { - return CorruptInputError(f.roffset) - } - repeat := 3 - for f.nb < 2 { - if err := f.moreBits(); err != nil { - return err - } - } - repeat += int(f.b & 0x3) - f.b >>= 2 - f.nb -= 2 - for repeat > 0 { - if i >= len(bits) { - return CorruptInputError(f.roffset) - } - bits[i] = bits[i-1] - i++ - repeat-- - } - case x == 17: - repeat := 3 - for f.nb < 3 { - if err := f.moreBits(); err != nil { - return err - } - } - repeat += int(f.b & 0x7) - f.b >>= 3 - f.nb -= 3 - for repeat > 0 { - if i >= len(bits) { - return CorruptInputError(f.roffset) - } - bits[i] = 0 - i++ - repeat-- - } - case x == 18: - repeat := 11 - for f.nb < 7 { - if err := f.moreBits(); err != nil { - return err - } - } - repeat += int(f.b & 0x7F) - f.b >>= 7 - f.nb -= 7 - for repeat > 0 { - if i >= len(bits) { - return CorruptInputError(f.roffset) - } - bits[i] = 0 - i++ - repeat-- - } - default: - return CorruptInputError(f.roffset) - } - } - if !f.h1.init(bits[:nlit]) { - return CorruptInputError(f.roffset) - } - if !f.h2.init(bits[nlit : nlit+ndist]) { - return CorruptInputError(f.roffset) - } - if f.h1.min < bits[endBlockMarker] { - f.h1.min = bits[endBlockMarker] - } - return nil -} diff --git a/internal/flatex/window_decoder.go b/internal/flatex/window_decoder.go deleted file mode 100644 index 492c6a96..00000000 --- a/internal/flatex/window_decoder.go +++ /dev/null @@ -1,101 +0,0 @@ -package flatex - -// windowDecoder implements the sliding window used in decompression. -type windowDecoder struct { - hist []byte - - wrPos int - rdPos int - full bool -} - -func (wd *windowDecoder) init(size int) { - *wd = windowDecoder{hist: wd.hist} - - if cap(wd.hist) < size { - wd.hist = make([]byte, size) - } - wd.hist = wd.hist[:size] - - wd.wrPos = 0 - wd.rdPos = 0 - wd.full = false -} - -func (wd *windowDecoder) histSize() int { - if wd.full { - return len(wd.hist) - } - return wd.wrPos -} - -func (wd *windowDecoder) availRead() int { - return wd.wrPos - wd.rdPos -} - -func (wd *windowDecoder) availWrite() int { - return len(wd.hist) - wd.wrPos -} - -func (wd *windowDecoder) writeSlice() []byte { - return wd.hist[wd.wrPos:] -} - -func (wd *windowDecoder) writeMark(cnt int) { - wd.wrPos += cnt -} - -func (wd *windowDecoder) writeByte(c byte) { - wd.hist[wd.wrPos] = c - wd.wrPos++ -} - -func (wd *windowDecoder) writeCopy(dist, length int) int { - dstBase := wd.wrPos - dstPos := dstBase - srcPos := dstPos - dist - endPos := dstPos + length - if endPos > len(wd.hist) { - endPos = len(wd.hist) - } - - if srcPos < 0 { - srcPos += len(wd.hist) - dstPos += copy(wd.hist[dstPos:endPos], wd.hist[srcPos:]) - srcPos = 0 - } - - for dstPos < endPos { - dstPos += copy(wd.hist[dstPos:endPos], wd.hist[srcPos:dstPos]) - } - - wd.wrPos = dstPos - return dstPos - dstBase -} - -func (wd *windowDecoder) tryWriteCopy(dist, length int) int { - dstPos := wd.wrPos - endPos := dstPos + length - if dstPos < dist || endPos > len(wd.hist) { - return 0 - } - dstBase := dstPos - srcPos := dstPos - dist - - for dstPos < endPos { - dstPos += copy(wd.hist[dstPos:endPos], wd.hist[srcPos:dstPos]) - } - - wd.wrPos = dstPos - return dstPos - dstBase -} - -func (wd *windowDecoder) readFlush() []byte { - toRead := wd.hist[wd.rdPos:wd.wrPos] - wd.rdPos = wd.wrPos - if wd.wrPos == len(wd.hist) { - wd.wrPos, wd.rdPos = 0, 0 - wd.full = true - } - return toRead -} diff --git a/internal/zlib/LICENSE b/internal/zlib/LICENSE deleted file mode 100644 index 2a7cf70d..00000000 --- a/internal/zlib/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright 2009 The Go Authors. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google LLC nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/internal/zlib/reader.go b/internal/zlib/reader.go deleted file mode 100644 index 2234e7e0..00000000 --- a/internal/zlib/reader.go +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -/* -Package zlib implements reading and writing of zlib format compressed data, -as specified in RFC 1950. - -This package differs from the standard library's compress/zlib package -in that it pools readers and writers to reduce allocations. - -Note that closing a reader or writer causes it to be returned to a pool -for reuse. Therefore, the caller must not retain references to a -reader or writer after closing it; in the standard library's -compress/zlib package, it is legal to Reset a closed reader or writer -and continue using it; that is not allowed here, so there is simply no -Resetter interface. - -The implementation provides filters that uncompress during reading -and compress during writing. For example, to write compressed data -to a buffer: - - var b bytes.Buffer - w := zlib.NewWriter(&b) - w.Write([]byte("hello, world\n")) - w.Close() - -and to read that data back: - - r, err := zlib.NewReader(&b) - io.Copy(os.Stdout, r) - r.Close() -*/ -package zlib - -import ( - "bufio" - "compress/flate" - "encoding/binary" - "errors" - "hash" - "io" - "sync" - - "codeberg.org/lindenii/furgit/internal/adler32" -) - -const ( - zlibDeflate = 8 - zlibMaxWindow = 7 -) - -var ( - // ErrChecksum is returned when reading ZLIB data that has an invalid checksum. - ErrChecksum = errors.New("zlib: invalid checksum") - // ErrDictionary is returned when reading ZLIB data that has an invalid dictionary. - ErrDictionary = errors.New("zlib: invalid dictionary") - // ErrHeader is returned when reading ZLIB data that has an invalid header. - ErrHeader = errors.New("zlib: invalid header") -) - -var readerPool = sync.Pool{ - New: func() any { - r := new(reader) - return r - }, -} - -type reader struct { - r flate.Reader - decompressor io.ReadCloser - digest hash.Hash32 - err error - scratch [4]byte -} - -// NewReader creates a new ReadCloser. -// Reads from the returned ReadCloser read and decompress data from r. -// If r does not implement [io.ByteReader], the decompressor may read more -// data than necessary from r. -// It is the caller's responsibility to call Close on the ReadCloser when done. -func NewReader(r io.Reader) (io.ReadCloser, error) { - return NewReaderDict(r, nil) -} - -// NewReaderDict is like [NewReader] but uses a preset dictionary. -// NewReaderDict ignores the dictionary if the compressed data does not refer to it. -// If the compressed data refers to a different dictionary, NewReaderDict returns [ErrDictionary]. -func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) { - v := readerPool.Get() - z, ok := v.(*reader) - if !ok { - panic("zlib: pool returned unexpected type") - } - err := z.Reset(r, dict) - if err != nil { - return nil, err - } - return z, nil -} - -func (z *reader) Read(p []byte) (int, error) { - if z.err != nil { - return 0, z.err - } - - var n int - n, z.err = z.decompressor.Read(p) - z.digest.Write(p[0:n]) - if z.err != io.EOF { - // In the normal case we return here. - return n, z.err - } - - // Finished file; check checksum. - if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil { - if err == io.EOF { - err = io.ErrUnexpectedEOF - } - z.err = err - return n, z.err - } - // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). - checksum := binary.BigEndian.Uint32(z.scratch[:4]) - if checksum != z.digest.Sum32() { - z.err = ErrChecksum - return n, z.err - } - return n, io.EOF -} - -// Calling Close does not close the wrapped [io.Reader] originally passed to [NewReader]. -// In order for the ZLIB checksum to be verified, the reader must be -// fully consumed until the [io.EOF]. -func (z *reader) Close() error { - if z.err != nil && z.err != io.EOF { - return z.err - } - z.err = z.decompressor.Close() - if z.err != nil { - return z.err - } - - readerPool.Put(z) - return nil -} - -func (z *reader) Reset(r io.Reader, dict []byte) error { - *z = reader{decompressor: z.decompressor} - if fr, ok := r.(flate.Reader); ok { - z.r = fr - } else { - z.r = bufio.NewReader(r) - } - - // Read the header (RFC 1950 section 2.2.). - _, z.err = io.ReadFull(z.r, z.scratch[0:2]) - if z.err != nil { - if z.err == io.EOF { - z.err = io.ErrUnexpectedEOF - } - return z.err - } - h := binary.BigEndian.Uint16(z.scratch[:2]) - if (z.scratch[0]&0x0f != zlibDeflate) || (z.scratch[0]>>4 > zlibMaxWindow) || (h%31 != 0) { - z.err = ErrHeader - return z.err - } - haveDict := z.scratch[1]&0x20 != 0 - if haveDict { - _, z.err = io.ReadFull(z.r, z.scratch[0:4]) - if z.err != nil { - if z.err == io.EOF { - z.err = io.ErrUnexpectedEOF - } - return z.err - } - checksum := binary.BigEndian.Uint32(z.scratch[:4]) - if checksum != adler32.Checksum(dict) { - z.err = ErrDictionary - return z.err - } - } - - if z.decompressor == nil { - if haveDict { - z.decompressor = flate.NewReaderDict(z.r, dict) - } else { - z.decompressor = flate.NewReader(z.r) - } - } else { - z.err = z.decompressor.(flate.Resetter).Reset(z.r, dict) - if z.err != nil { - return z.err - } - } - z.digest = adler32.New() - return nil -} diff --git a/internal/zlib/writer.go b/internal/zlib/writer.go deleted file mode 100644 index 81c57f55..00000000 --- a/internal/zlib/writer.go +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package zlib - -import ( - "compress/flate" - "encoding/binary" - "fmt" - "hash" - "io" - "sync" - - "codeberg.org/lindenii/furgit/internal/adler32" -) - -// These constants are copied from the [flate] package, so that code that imports -// [compress/zlib] does not also have to import [compress/flate]. -const ( - NoCompression = flate.NoCompression - BestSpeed = flate.BestSpeed - BestCompression = flate.BestCompression - DefaultCompression = flate.DefaultCompression - HuffmanOnly = flate.HuffmanOnly -) - -// A Writer takes data written to it and writes the compressed -// form of that data to an underlying writer (see [NewWriter]). -type Writer struct { - w io.Writer - level int - dict []byte - compressor *flate.Writer - digest hash.Hash32 - err error - scratch [4]byte - wroteHeader bool -} - -var writerPool = sync.Pool{ - New: func() any { - return new(Writer) - }, -} - -// NewWriter creates a new [Writer]. -// Writes to the returned Writer are compressed and written to w. -// -// It is the caller's responsibility to call Close on the Writer when done. -// Writes may be buffered and not flushed until Close. -func NewWriter(w io.Writer) *Writer { - z, _ := NewWriterLevelDict(w, DefaultCompression, nil) - return z -} - -// NewWriterLevel is like [NewWriter] but specifies the compression level instead -// of assuming [DefaultCompression]. -// -// The compression level can be [DefaultCompression], [NoCompression], [HuffmanOnly] -// or any integer value between [BestSpeed] and [BestCompression] inclusive. -// The error returned will be nil if the level is valid. -func NewWriterLevel(w io.Writer, level int) (*Writer, error) { - return NewWriterLevelDict(w, level, nil) -} - -// NewWriterLevelDict is like [NewWriterLevel] but specifies a dictionary to -// compress with. -// -// The dictionary may be nil. If not, its contents should not be modified until -// the Writer is closed. -func NewWriterLevelDict(w io.Writer, level int, dict []byte) (*Writer, error) { - if level < HuffmanOnly || level > BestCompression { - return nil, fmt.Errorf("zlib: invalid compression level: %d", level) - } - v := writerPool.Get() - z, ok := v.(*Writer) - if !ok { - panic("zlib: pool returned unexpected type") - } - - // flate.Writer can only be Reset with the same level/dictionary mode. - // Reuse it only when the configuration is unchanged and dictionary-free. - reuseCompressor := z.compressor != nil && z.level == level && z.dict == nil && dict == nil - if !reuseCompressor { - z.compressor = nil - } - if z.digest != nil { - z.digest.Reset() - } - - *z = Writer{ - w: w, - level: level, - dict: dict, - compressor: z.compressor, - digest: z.digest, - } - if z.compressor != nil { - z.compressor.Reset(w) - } - return z, nil -} - -// Reset clears the state of the [Writer] z such that it is equivalent to its -// initial state from [NewWriterLevel] or [NewWriterLevelDict], but instead writing -// to w. -func (z *Writer) Reset(w io.Writer) { - z.w = w - // z.level and z.dict left unchanged. - if z.compressor != nil { - z.compressor.Reset(w) - } - if z.digest != nil { - z.digest.Reset() - } - z.err = nil - z.scratch = [4]byte{} - z.wroteHeader = false -} - -// writeHeader writes the ZLIB header. -func (z *Writer) writeHeader() (err error) { - z.wroteHeader = true - // ZLIB has a two-byte header (as documented in RFC 1950). - // The first four bits is the CINFO (compression info), which is 7 for the default deflate window size. - // The next four bits is the CM (compression method), which is 8 for deflate. - z.scratch[0] = 0x78 - // The next two bits is the FLEVEL (compression level). The four values are: - // 0=fastest, 1=fast, 2=default, 3=best. - // The next bit, FDICT, is set if a dictionary is given. - // The final five FCHECK bits form a mod-31 checksum. - switch z.level { - case -2, 0, 1: - z.scratch[1] = 0 << 6 - case 2, 3, 4, 5: - z.scratch[1] = 1 << 6 - case 6, -1: - z.scratch[1] = 2 << 6 - case 7, 8, 9: - z.scratch[1] = 3 << 6 - default: - panic("unreachable") - } - if z.dict != nil { - z.scratch[1] |= 1 << 5 - } - z.scratch[1] += uint8(31 - binary.BigEndian.Uint16(z.scratch[:2])%31) - if _, err = z.w.Write(z.scratch[0:2]); err != nil { - return err - } - if z.dict != nil { - // The next four bytes are the Adler-32 checksum of the dictionary. - binary.BigEndian.PutUint32(z.scratch[:], adler32.Checksum(z.dict)) - if _, err = z.w.Write(z.scratch[0:4]); err != nil { - return err - } - } - if z.compressor == nil { - // Initialize deflater unless the Writer is being reused - // after a Reset call. - z.compressor, err = flate.NewWriterDict(z.w, z.level, z.dict) - if err != nil { - return err - } - z.digest = adler32.New() - } - return nil -} - -// Write writes a compressed form of p to the underlying [io.Writer]. The -// compressed bytes are not necessarily flushed until the [Writer] is closed or -// explicitly flushed. -func (z *Writer) Write(p []byte) (n int, err error) { - if !z.wroteHeader { - z.err = z.writeHeader() - } - if z.err != nil { - return 0, z.err - } - if len(p) == 0 { - return 0, nil - } - n, err = z.compressor.Write(p) - if err != nil { - z.err = err - return - } - z.digest.Write(p) - return -} - -// Flush flushes the Writer to its underlying [io.Writer]. -func (z *Writer) Flush() error { - if !z.wroteHeader { - z.err = z.writeHeader() - } - if z.err != nil { - return z.err - } - z.err = z.compressor.Flush() - return z.err -} - -// Close closes the Writer, flushing any unwritten data to the underlying -// [io.Writer], but does not close the underlying io.Writer. -func (z *Writer) Close() error { - if !z.wroteHeader { - z.err = z.writeHeader() - } - if z.err != nil { - return z.err - } - z.err = z.compressor.Close() - if z.err != nil { - return z.err - } - checksum := z.digest.Sum32() - // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). - binary.BigEndian.PutUint32(z.scratch[:], checksum) - _, z.err = z.w.Write(z.scratch[0:4]) - if z.err != nil { - return z.err - } - - writerPool.Put(z) - return nil -} diff --git a/internal/zlibx/LICENSE b/internal/zlibx/LICENSE deleted file mode 100644 index 2a7cf70d..00000000 --- a/internal/zlibx/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright 2009 The Go Authors. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google LLC nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/internal/zlibx/constants.go b/internal/zlibx/constants.go deleted file mode 100644 index 161e3458..00000000 --- a/internal/zlibx/constants.go +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -/* -Package zlibx implements reading of zlib format compressed data, -as specified in RFC 1950. - -This package differs from the standard library's compress/zlib package -in that it pools readers to reduce allocations. Writing is unsupported. - -THis package will likely be refactorered much more for our specific -use case of only doing full decompressions to byte slices. - -Note that closing the reader causes it to be returned to a pool for -reuse. Therefore, the caller must not retain references to the -reader after closing it; in the standard library's compress/zlib package, -it is legal to Reset a closed reader and continue using it; that is -not allowed here, so there is simply no Resetter interface. - -The implementation provides filters that uncompress during reading -and compress during writing. For example, to write compressed data -to a buffer: - - var b bytes.Buffer - w := zlib.NewWriter(&b) - w.Write([]byte("hello, world\n")) - w.Close() - -and to read that data back: - - r, err := zlib.NewReader(&b) - io.Copy(os.Stdout, r) - r.Close() -*/ -package zlibx - -import ( - "errors" -) - -const ( - zlibDeflate = 8 - zlibMaxWindow = 7 -) - -var ( - // ErrChecksum is returned when reading ZLIB data that has an invalid checksum. - ErrChecksum = errors.New("zlib: invalid checksum") - // ErrHeader is returned when reading ZLIB data that has an invalid header. - ErrHeader = errors.New("zlib: invalid header") -) diff --git a/internal/zlibx/decompress.go b/internal/zlibx/decompress.go deleted file mode 100644 index 126c1fcb..00000000 --- a/internal/zlibx/decompress.go +++ /dev/null @@ -1,54 +0,0 @@ -package zlibx - -import ( - "encoding/binary" - "io" - - "codeberg.org/lindenii/furgit/internal/adler32" - "codeberg.org/lindenii/furgit/internal/bufpool" - "codeberg.org/lindenii/furgit/internal/flatex" -) - -func Decompress(src []byte) (bufpool.Buffer, error) { - out, _, err := DecompressSized(src, 0) - return out, err -} - -func DecompressSized(src []byte, sizeHint int) (buf bufpool.Buffer, consumed int, err error) { - if len(src) < 6 { - return bufpool.Buffer{}, 0, io.ErrUnexpectedEOF - } - - cmf := src[0] - flg := src[1] - if (cmf&0x0f != zlibDeflate) || (cmf>>4 > zlibMaxWindow) || (binary.BigEndian.Uint16(src[:2])%31 != 0) { - return bufpool.Buffer{}, 0, ErrHeader - } - - offset := 2 - if flg&0x20 != 0 { - return bufpool.Buffer{}, 0, ErrHeader - } - - if len(src[offset:]) < 4 { - return bufpool.Buffer{}, 0, io.ErrUnexpectedEOF - } - - deflateData := src[offset:] - out, consumed, err := flatex.DecompressSized(deflateData, sizeHint) - if err != nil { - return bufpool.Buffer{}, 0, err - } - - checksumPos := offset + consumed - if checksumPos+4 > len(src) { - out.Release() - return bufpool.Buffer{}, 0, io.ErrUnexpectedEOF - } - expected := binary.BigEndian.Uint32(src[checksumPos : checksumPos+4]) - if expected != adler32.Checksum(out.Bytes()) { - out.Release() - return bufpool.Buffer{}, 0, ErrChecksum - } - return out, checksumPos + 4, nil -} diff --git a/internal/zlibx/decompress_test.go b/internal/zlibx/decompress_test.go deleted file mode 100644 index bea348d2..00000000 --- a/internal/zlibx/decompress_test.go +++ /dev/null @@ -1,170 +0,0 @@ -package zlibx - -import ( - "bytes" - stdzlib "compress/zlib" - "crypto/rand" - "testing" -) - -func compressZlib(t *testing.T, payload []byte) []byte { - t.Helper() - var buf bytes.Buffer - w := stdzlib.NewWriter(&buf) - if _, err := w.Write(payload); err != nil { - t.Fatalf("Write: %v", err) - } - if err := w.Close(); err != nil { - t.Fatalf("Close: %v", err) - } - return buf.Bytes() -} - -func TestDecompress(t *testing.T) { - makeRand := func(n int) []byte { - b := make([]byte, n) - if _, err := rand.Read(b); err != nil { - t.Fatalf("rand.Read: %v", err) - } - return b - } - - type tc struct { - name string - payload []byte - } - - tests := []tc{ - { - name: "simple-hello", - payload: []byte("hello, zlib world!"), - }, - { - name: "empty", - payload: []byte{}, - }, - { - name: "single-byte", - payload: []byte{0x42}, - }, - { - name: "all-zero-1k", - payload: bytes.Repeat([]byte{0}, 1024), - }, - { - name: "all-FF-1k", - payload: bytes.Repeat([]byte{0xFF}, 1024), - }, - { - name: "ascii-repeated-pattern", - payload: bytes.Repeat([]byte("ABC123!"), 500), - }, - { - name: "binary-structured", - payload: []byte{ - 0x00, 0x01, 0x02, 0x03, - 0x10, 0x20, 0x30, 0x40, - 0xFF, 0xEE, 0xDD, 0xCC, - }, - }, - { - name: "1k-crypto-random", - payload: makeRand(1024), - }, - { - name: "32k-crypto-random", - payload: makeRand(32 * 1024), - }, - { - name: "256k-crypto-random", - payload: makeRand(256 * 1024), - }, - { - name: "highly-compressible-large", - payload: bytes.Repeat([]byte("AAAAAAAAAAAAAAAAAAAA"), 50_000), - }, - { - name: "json", - payload: []byte(`{"name":"test","values":[1,2,3,4],"deep":{"x":123,"y":"abc"}}`), - }, - { - name: "html", - payload: []byte("<html><body><h1>Title</h1><p>Paragraph</p></body></html>"), - }, - { - name: "alternating-binary-pattern", - payload: func() []byte { - b := make([]byte, 4096) - for i := 0; i < len(b); i++ { - if i%2 == 0 { - b[i] = 0xAA - } else { - b[i] = 0x55 - } - } - return b - }(), - }, - { - name: "large-repetitive-words", - payload: bytes.Repeat([]byte("the quick brown fox jumps over the lazy dog\n"), 4000), - }, - { - name: "unicode", - payload: []byte("我不知道该说点啥就随便打点字吧🤷♀️"), - }, - { - name: "multi-meg-random-2MB", - payload: makeRand(2 * 1024 * 1024), - }, - { - name: "multi-meg-random-16MB", - payload: makeRand(16 * 1024 * 1024), - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - compressed := compressZlib(t, tt.payload) - - out, err := Decompress(compressed) - if err != nil { - t.Fatalf("Decompress: %v", err) - } - defer out.Release() - - if !bytes.Equal(out.Bytes(), tt.payload) { - t.Fatalf("payload mismatch: got %d bytes, want %d", len(out.Bytes()), len(tt.payload)) - } - }) - } -} - -func TestDecompressChecksumError(t *testing.T) { - payload := []byte("checksum check") - compressed := compressZlib(t, payload) - compressed[len(compressed)-1] ^= 0xff - - if _, err := Decompress(compressed); err != ErrChecksum { - t.Fatalf("expected ErrChecksum, got %v", err) - } -} - -func TestDecompressSizedUsesHint(t *testing.T) { - payload := []byte("tiny payload") - compressed := compressZlib(t, payload) - - const hint = 1 << 20 - out, _, err := DecompressSized(compressed, hint) - if err != nil { - t.Fatalf("DecompressSized: %v", err) - } - defer out.Release() - - if !bytes.Equal(out.Bytes(), payload) { - t.Fatalf("unexpected payload %q", out.Bytes()) - } - if cap(out.Bytes()) < hint { - t.Fatalf("expected capacity >= %d, got %d", hint, cap(out.Bytes())) - } -} diff --git a/loose.go b/loose.go deleted file mode 100644 index 89779a93..00000000 --- a/loose.go +++ /dev/null @@ -1,209 +0,0 @@ -package furgit - -import ( - "bytes" - "fmt" - "io" - "os" - "path/filepath" - "strconv" - - "codeberg.org/lindenii/furgit/internal/bufpool" - "codeberg.org/lindenii/furgit/internal/zlib" - "codeberg.org/lindenii/furgit/internal/zlibx" -) - -const looseHeaderLimit = 4096 - -// loosePath returns the path for a loose object, validating hash size. -func (repo *Repository) loosePath(id Hash) (string, error) { - if id.algo != repo.hashAlgo { - return "", fmt.Errorf("furgit: hash algorithm mismatch: got %s, expected %s", id.algo.String(), repo.hashAlgo.String()) - } - hex := id.String() - return filepath.Join("objects", hex[:2], hex[2:]), nil -} - -func (repo *Repository) looseRead(id Hash) (ObjectType, bufpool.Buffer, error) { - ty, body, err := repo.looseReadTyped(id) - if err != nil { - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - return ty, body, nil -} - -func (repo *Repository) looseReadTyped(id Hash) (ObjectType, bufpool.Buffer, error) { - path, err := repo.loosePath(id) - if err != nil { - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - path = repo.repoPath(path) - f, err := os.Open(path) - if err != nil { - if os.IsNotExist(err) { - return ObjectTypeInvalid, bufpool.Buffer{}, ErrNotFound - } - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - defer func() { _ = f.Close() }() - - compressed, err := io.ReadAll(f) - if err != nil { - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - - raw, err := zlibx.Decompress(compressed) - if err != nil { - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - - rawBytes := raw.Bytes() - nul := bytes.IndexByte(rawBytes, 0) - if nul < 0 { - raw.Release() - return ObjectTypeInvalid, bufpool.Buffer{}, ErrInvalidObject - } - - header := rawBytes[:nul] - body := rawBytes[nul+1:] - - ty, declaredSize, err := parseLooseHeader(header) - if err != nil { - raw.Release() - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - if declaredSize != int64(len(body)) { - raw.Release() - return ObjectTypeInvalid, bufpool.Buffer{}, ErrInvalidObject - } - - copy(rawBytes, body) - raw.Resize(len(body)) - - return ty, raw, nil -} - -func (repo *Repository) looseTypeSize(id Hash) (ObjectType, int64, error) { - path, err := repo.loosePath(id) - if err != nil { - return ObjectTypeInvalid, 0, err - } - path = repo.repoPath(path) - // #nosec G304 - f, err := os.Open(path) - if err != nil { - if os.IsNotExist(err) { - return ObjectTypeInvalid, 0, ErrNotFound - } - return ObjectTypeInvalid, 0, err - } - defer func() { _ = f.Close() }() - - zr, err := zlib.NewReader(f) - if err != nil { - return ObjectTypeInvalid, 0, err - } - defer func() { _ = zr.Close() }() - - header := make([]byte, 0, 64) - chunk := make([]byte, 128) - for { - n, readErr := zr.Read(chunk) - if n > 0 { - data := chunk[:n] - if nul := bytes.IndexByte(data, 0); nul >= 0 { - header = append(header, data[:nul]...) - if len(header) > looseHeaderLimit { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - break - } - header = append(header, data...) - if len(header) > looseHeaderLimit { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - } - if readErr != nil { - if readErr == io.EOF { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - return ObjectTypeInvalid, 0, readErr - } - } - return parseLooseHeader(header) -} - -func parseLooseHeader(header []byte) (ObjectType, int64, error) { - space := bytes.IndexByte(header, ' ') - if space < 0 { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - ty, err := objTypeFromName(string(header[:space])) - if err != nil { - return ObjectTypeInvalid, 0, err - } - expect := header[space+1:] - if len(expect) == 0 { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - size, err := strconv.ParseInt(string(expect), 10, 64) - if err != nil { - return ObjectTypeInvalid, 0, fmt.Errorf("furgit: loose: size parse: %w", err) - } - if size < 0 { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - return ty, size, nil -} - -func objTypeFromName(name string) (ObjectType, error) { - switch name { - case objectTypeNameBlob: - return ObjectTypeBlob, nil - case objectTypeNameTree: - return ObjectTypeTree, nil - case objectTypeNameCommit: - return ObjectTypeCommit, nil - case objectTypeNameTag: - return ObjectTypeTag, nil - default: - return ObjectTypeInvalid, ErrInvalidObject - } -} - -// WriteLooseObject writes an object to the repository as a loose object. -func (repo *Repository) WriteLooseObject(obj Object) (Hash, error) { - if obj == nil { - return Hash{}, ErrInvalidObject - } - raw, err := obj.Serialize() - if err != nil { - return Hash{}, err - } - - id := repo.computeRawHash(raw) - path, err := repo.loosePath(id) - if err != nil { - return Hash{}, err - } - path = repo.repoPath(path) - - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return Hash{}, err - } - - var buf bytes.Buffer - zw := zlib.NewWriter(&buf) - if _, err := zw.Write(raw); err != nil { - return Hash{}, err - } - if err := zw.Close(); err != nil { - return Hash{}, err - } - - if err := os.WriteFile(path, buf.Bytes(), 0o644); err != nil { - return Hash{}, err - } - - return id, nil -} diff --git a/obj.go b/obj.go deleted file mode 100644 index f3ed8bfa..00000000 --- a/obj.go +++ /dev/null @@ -1,168 +0,0 @@ -package furgit - -import ( - "bytes" - "errors" - "fmt" - "strconv" -) - -// ObjectType mirrors Git's object type tags. -type ObjectType uint8 - -const ( - // An invalid object. - ObjectTypeInvalid ObjectType = 0 - // A commit object. - ObjectTypeCommit ObjectType = 1 - // A tree object. - ObjectTypeTree ObjectType = 2 - // A blob object. - ObjectTypeBlob ObjectType = 3 - // An annotated tag object. - ObjectTypeTag ObjectType = 4 - // An object type reserved for future use. - ObjectTypeFuture ObjectType = 5 - // A packfile offset delta object. This is not typically exposed. - ObjectTypeOfsDelta ObjectType = 6 - // A packfile reference delta object. This is not typically exposed. - ObjectTypeRefDelta ObjectType = 7 -) - -const ( - objectTypeNameBlob = "blob" - objectTypeNameTree = "tree" - objectTypeNameCommit = "commit" - objectTypeNameTag = "tag" -) - -// Object represents a Git object. -type Object interface { - // ObjectType returns the object's type. - ObjectType() ObjectType - // Serialize renders the object into its raw byte representation, - // including the header (i.e., "type size\0"). - Serialize() ([]byte, error) -} - -// StoredObject describes a Git object with a known hash, such as -// one read from storage. -type StoredObject interface { - Object - // Hash returns the object's hash. - Hash() Hash -} - -func headerForType(ty ObjectType, body []byte) ([]byte, error) { - var tyStr string - switch ty { - case ObjectTypeBlob: - tyStr = objectTypeNameBlob - case ObjectTypeTree: - tyStr = objectTypeNameTree - case ObjectTypeCommit: - tyStr = objectTypeNameCommit - case ObjectTypeTag: - tyStr = objectTypeNameTag - case ObjectTypeInvalid, ObjectTypeFuture, ObjectTypeOfsDelta, ObjectTypeRefDelta: - return nil, fmt.Errorf("furgit: object: unsupported type %d", ty) - default: - return nil, fmt.Errorf("furgit: object: unsupported type %d", ty) - } - size := strconv.Itoa(len(body)) - var buf bytes.Buffer - buf.Grow(len(tyStr) + len(size) + 1) - buf.WriteString(tyStr) - buf.WriteByte(' ') - buf.WriteString(size) - buf.WriteByte(0) - return buf.Bytes(), nil -} - -func parseObjectBody(ty ObjectType, id Hash, body []byte, repo *Repository) (StoredObject, error) { - switch ty { - case ObjectTypeBlob: - return parseBlob(id, body) - case ObjectTypeTree: - return parseTree(id, body, repo) - case ObjectTypeCommit: - return parseCommit(id, body, repo) - case ObjectTypeTag: - return parseTag(id, body, repo) - case ObjectTypeInvalid, ObjectTypeFuture, ObjectTypeOfsDelta, ObjectTypeRefDelta: - return nil, fmt.Errorf("furgit: object: unsupported type %d", ty) - default: - return nil, fmt.Errorf("furgit: object: unknown type %d", ty) - } -} - -// ReadObject resolves an ID. -func (repo *Repository) ReadObject(id Hash) (StoredObject, error) { - ty, body, err := repo.looseRead(id) - if err == nil { - obj, parseErr := parseObjectBody(ty, id, body.Bytes(), repo) - body.Release() - return obj, parseErr - } - if !errors.Is(err, ErrNotFound) { - return nil, err - } - ty, body, err = repo.packRead(id) - if errors.Is(err, ErrNotFound) { - return nil, ErrNotFound - } - if err != nil { - return nil, err - } - obj, parseErr := parseObjectBody(ty, id, body.Bytes(), repo) - body.Release() - return obj, parseErr -} - -// ReadObjectTypeRaw reads the object type and raw body. -func (repo *Repository) ReadObjectTypeRaw(id Hash) (ObjectType, []byte, error) { - ty, body, err := repo.looseRead(id) - if err == nil { - return ty, body.Bytes(), nil - } - if !errors.Is(err, ErrNotFound) { - return ObjectTypeInvalid, nil, err - } - ty, body, err = repo.packRead(id) - if errors.Is(err, ErrNotFound) { - return ObjectTypeInvalid, nil, ErrNotFound - } - if err != nil { - return ObjectTypeInvalid, nil, err - } - return ty, body.Bytes(), nil - // note to self: It always feels wrong to not call .Release in places like - // this but this is actually correct; we're returning the underlying buffer - // to the user who should not be aware of our internal buffer pooling. - // Releasing this buffer back to the pool would lead to a use-after-free; - // not releasing it as we do here, means it gets GC'ed. - // Copying into a newly allocated buffer is even worse as it incurs - // unnecessary copy overhead. -} - -// ReadObjectTypeSize reports the object type and size. -// -// Typicall, this is more efficient than reading the full object, -// as it avoids decompressing the entire object body. -func (repo *Repository) ReadObjectTypeSize(id Hash) (ObjectType, int64, error) { - ty, size, err := repo.looseTypeSize(id) - if err == nil { - return ty, size, nil - } - if !errors.Is(err, ErrNotFound) { - return ObjectTypeInvalid, 0, err - } - loc, err := repo.packIndexFind(id) - if err != nil { - if errors.Is(err, ErrNotFound) { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - return ObjectTypeInvalid, 0, err - } - return repo.packTypeSizeAtLocation(loc, nil) -} diff --git a/obj_blob.go b/obj_blob.go deleted file mode 100644 index 70115e6a..00000000 --- a/obj_blob.go +++ /dev/null @@ -1,49 +0,0 @@ -package furgit - -// Blob represents a Git blob object. -type Blob struct { - // Data represents the payload content of the blob. - Data []byte -} - -// StoredBlob represents a blob stored in the object database. -type StoredBlob struct { - Blob - hash Hash -} - -// Hash returns the hash of the stored blob. -func (sBlob *StoredBlob) Hash() Hash { - return sBlob.hash -} - -// ObjectType returns the object type of the blob. -// -// It always returns ObjectTypeBlob. -func (blob *Blob) ObjectType() ObjectType { - _ = blob - return ObjectTypeBlob -} - -func parseBlob(id Hash, body []byte) (*StoredBlob, error) { - data := append([]byte(nil), body...) - return &StoredBlob{ - hash: id, - Blob: Blob{ - Data: data, - }, - }, nil -} - -// Serialize renders the blob into its raw byte representation, -// including the header (i.e., "type size\0"). -func (blob *Blob) Serialize() ([]byte, error) { - header, err := headerForType(ObjectTypeBlob, blob.Data) - if err != nil { - return nil, err - } - raw := make([]byte, len(header)+len(blob.Data)) - copy(raw, header) - copy(raw[len(header):], blob.Data) - return raw, nil -} diff --git a/obj_blob_test.go b/obj_blob_test.go deleted file mode 100644 index 72ef0a23..00000000 --- a/obj_blob_test.go +++ /dev/null @@ -1,120 +0,0 @@ -package furgit - -import ( - "bytes" - "fmt" - "testing" -) - -func TestBlobRead(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - testData := []byte("Hello, Furgit!\nThis is test blob data.") - gitHash := gitHashObject(t, repoPath, "blob", testData) - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - hash, _ := repo.ParseHash(gitHash) - obj, err := repo.ReadObject(hash) - if err != nil { - t.Fatalf("ReadObject failed: %v", err) - } - - blob, ok := obj.(*StoredBlob) - if !ok { - t.Fatalf("expected *StoredBlob, got %T", obj) - } - - if !bytes.Equal(blob.Data, testData) { - t.Errorf("Data mismatch: got %q, want %q", blob.Data, testData) - } - if blob.Hash() != hash { - t.Errorf("Hash(): got %s, want %s", blob.Hash(), hash) - } - if blob.ObjectType() != ObjectTypeBlob { - t.Errorf("ObjectType(): got %d, want %d", blob.ObjectType(), ObjectTypeBlob) - } - - gitData := gitCatFile(t, repoPath, "blob", gitHash) - if !bytes.Equal(blob.Data, gitData) { - t.Error("furgit data doesn't match git data") - } -} - -func TestBlobWrite(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - testData := []byte("Test data written by furgit") - blob := &Blob{Data: testData} - - hash, err := repo.WriteLooseObject(blob) - if err != nil { - t.Fatalf("WriteLooseObject failed: %v", err) - } - - gitType := string(gitCatFile(t, repoPath, "-t", hash.String())) - if gitType != "blob" { - t.Errorf("git type: got %q, want %q", gitType, "blob") - } - - gitData := gitCatFile(t, repoPath, "blob", hash.String()) - if !bytes.Equal(gitData, testData) { - t.Error("git data doesn't match written data") - } - - gitSize := string(gitCatFile(t, repoPath, "-s", hash.String())) - if gitSize != fmt.Sprintf("%d", len(testData)) { - t.Errorf("git size: got %s, want %d", gitSize, len(testData)) - } -} - -func TestBlobRoundtrip(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - testData := []byte("Roundtrip test data") - blob := &Blob{Data: testData} - - hash, err := repo.WriteLooseObject(blob) - if err != nil { - t.Fatalf("WriteLooseObject failed: %v", err) - } - - obj, err := repo.ReadObject(hash) - if err != nil { - t.Fatalf("ReadObject failed: %v", err) - } - - readBlob, ok := obj.(*StoredBlob) - if !ok { - t.Fatalf("expected *StoredBlob, got %T", obj) - } - - if !bytes.Equal(readBlob.Data, testData) { - t.Error("roundtrip data mismatch") - } -} diff --git a/obj_commit.go b/obj_commit.go deleted file mode 100644 index d71754dd..00000000 --- a/obj_commit.go +++ /dev/null @@ -1,161 +0,0 @@ -package furgit - -import ( - "bytes" - "errors" - "fmt" -) - -// Commit represents a Git commit object. -type Commit struct { - // Tree represents the tree hash referenced by the commit. - Tree Hash - // Parents represents the parent commit hashes. - // Commits that have 0 parents are root commits. - // Commits that have >= 2 parents are merge commits. - Parents []Hash - // Author represents the author of the commit. - Author Ident - // Committer represents the committer of the commit. - Committer Ident - // Message represents the commit message. - Message []byte - // ChangeID represents the change-id header used by - // Gerrit and Jujutsu. - ChangeID string - // ExtraHeaders holds any extra headers present in the commit. - ExtraHeaders []ExtraHeader -} - -// StoredCommit represents a commit stored in the object database. -type StoredCommit struct { - Commit - hash Hash -} - -// Hash returns the hash of the stored commit. -func (sCommit *StoredCommit) Hash() Hash { - return sCommit.hash -} - -// ObjectType returns the object type of the commit. -// -// It always returns ObjectTypeCommit. -func (commit *Commit) ObjectType() ObjectType { - _ = commit - return ObjectTypeCommit -} - -func parseCommit(id Hash, body []byte, repo *Repository) (*StoredCommit, error) { - c := new(StoredCommit) - c.hash = id - i := 0 - for i < len(body) { - rel := bytes.IndexByte(body[i:], '\n') - if rel < 0 { - return nil, errors.New("furgit: commit: missing newline") - } - line := body[i : i+rel] - i += rel + 1 - if len(line) == 0 { - break - } - - switch { - case bytes.HasPrefix(line, []byte("tree ")): - treeID, err := repo.ParseHash(string(line[5:])) - if err != nil { - return nil, fmt.Errorf("furgit: commit: tree: %w", err) - } - c.Tree = treeID - case bytes.HasPrefix(line, []byte("parent ")): - parent, err := repo.ParseHash(string(line[7:])) - if err != nil { - return nil, fmt.Errorf("furgit: commit: parent: %w", err) - } - c.Parents = append(c.Parents, parent) - case bytes.HasPrefix(line, []byte("change-id ")): - c.ChangeID = string(line) - case bytes.HasPrefix(line, []byte("author ")): - idt, err := parseIdent(line[7:]) - if err != nil { - return nil, fmt.Errorf("furgit: commit: author: %w", err) - } - c.Author = *idt - case bytes.HasPrefix(line, []byte("committer ")): - idt, err := parseIdent(line[10:]) - if err != nil { - return nil, fmt.Errorf("furgit: commit: committer: %w", err) - } - c.Committer = *idt - case bytes.HasPrefix(line, []byte("gpgsig ")), bytes.HasPrefix(line, []byte("gpgsig-sha256 ")): - // TODO: handle this - for i < len(body) { - nextRel := bytes.IndexByte(body[i:], '\n') - if nextRel < 0 { - return nil, errors.New("furgit: commit: unterminated gpgsig") - } - if body[i] != ' ' { - break - } - i += nextRel + 1 - } - default: - key, value, found := bytes.Cut(line, []byte{' '}) - if !found { - return nil, errors.New("furgit: commit: malformed header") - } - c.ExtraHeaders = append(c.ExtraHeaders, ExtraHeader{Key: string(key), Value: value}) - } - } - - if i > len(body) { - return nil, ErrInvalidObject - } - - c.Message = append([]byte(nil), body[i:]...) - return c, nil -} - -func (commit *Commit) serialize() ([]byte, error) { - var buf bytes.Buffer - fmt.Fprintf(&buf, "tree %s\n", commit.Tree.String()) - for _, p := range commit.Parents { - fmt.Fprintf(&buf, "parent %s\n", p.String()) - } - buf.WriteString("author ") - ab, err := commit.Author.Serialize() - if err != nil { - return nil, err - } - buf.Write(ab) - buf.WriteByte('\n') - buf.WriteString("committer ") - cb, err := commit.Committer.Serialize() - if err != nil { - return nil, err - } - buf.Write(cb) - buf.WriteByte('\n') - buf.WriteByte('\n') - buf.Write(commit.Message) - - return buf.Bytes(), nil -} - -// Serialize renders the commit into its raw byte representation, -// including the header (i.e., "type size\0"). -func (commit *Commit) Serialize() ([]byte, error) { - body, err := commit.serialize() - if err != nil { - return nil, err - } - header, err := headerForType(ObjectTypeCommit, body) - if err != nil { - return nil, err - } - raw := make([]byte, len(header)+len(body)) - copy(raw, header) - copy(raw[len(header):], body) - return raw, nil -} diff --git a/obj_commit_test.go b/obj_commit_test.go deleted file mode 100644 index 939385d5..00000000 --- a/obj_commit_test.go +++ /dev/null @@ -1,188 +0,0 @@ -package furgit - -import ( - "bytes" - "fmt" - "os" - "path/filepath" - "testing" - "time" -) - -func TestCommitWrite(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - blobHash := gitHashObject(t, repoPath, "blob", []byte("content")) - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - blobHashObj, _ := repo.ParseHash(blobHash) - tree := &Tree{ - Entries: []TreeEntry{ - {Mode: 0o100644, Name: []byte("file.txt"), ID: blobHashObj}, - }, - } - treeHash, _ := repo.WriteLooseObject(tree) - - whenUnix := time.Date(2023, 11, 16, 12, 0, 0, 0, time.UTC).Unix() - commit := &Commit{ - Tree: treeHash, - Author: Ident{ - Name: []byte("Test Author"), - Email: []byte("test@example.org"), - WhenUnix: whenUnix, - OffsetMinutes: 0, - }, - Committer: Ident{ - Name: []byte("Test Committer"), - Email: []byte("committer@example.org"), - WhenUnix: whenUnix, - OffsetMinutes: 0, - }, - Message: []byte("Initial commit\n"), - } - - commitHash, err := repo.WriteLooseObject(commit) - if err != nil { - t.Fatalf("WriteLooseObject failed: %v", err) - } - - gitType := string(gitCatFile(t, repoPath, "-t", commitHash.String())) - if gitType != "commit" { - t.Errorf("git type: got %q, want %q", gitType, "commit") - } - - readObj, err := repo.ReadObject(commitHash) - if err != nil { - t.Fatalf("ReadObject failed after write: %v", err) - } - readCommit, ok := readObj.(*StoredCommit) - if !ok { - t.Fatalf("expected *StoredCommit, got %T", readObj) - } - - if !bytes.HasPrefix(readCommit.Author.Name, []byte("Test Author")) { - t.Errorf("author name: got %q, want prefix %q", readCommit.Author.Name, "Test Author") - } - if !bytes.Equal(readCommit.Message, []byte("Initial commit\n")) { - t.Errorf("message: got %q, want %q", readCommit.Message, "Initial commit\n") - } -} - -func TestCommitRead(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "file.txt"), []byte("content"), 0o644) - if err != nil { - t.Fatalf("failed to write file.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Test commit") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - hash, _ := repo.ParseHash(commitHash) - obj, err := repo.ReadObject(hash) - if err != nil { - t.Fatalf("ReadObject failed: %v", err) - } - - commit, ok := obj.(*StoredCommit) - if !ok { - t.Fatalf("expected *StoredCommit, got %T", obj) - } - - if !bytes.HasPrefix(commit.Author.Name, []byte("Test Author")) { - t.Errorf("author name: got %q", commit.Author.Name) - } - if !bytes.Equal(commit.Author.Email, []byte("test@example.org")) { - t.Errorf("author email: got %q", commit.Author.Email) - } - if !bytes.Equal(commit.Message, []byte("Test commit\n")) { - t.Errorf("message: got %q", commit.Message) - } - if commit.ObjectType() != ObjectTypeCommit { - t.Errorf("ObjectType(): got %d, want %d", commit.ObjectType(), ObjectTypeCommit) - } -} - -func TestCommitWithParents(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "file1.txt"), []byte("content1"), 0o644) - if err != nil { - t.Fatalf("failed to write file1.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "First commit") - parent1Hash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - err = os.WriteFile(filepath.Join(workDir, "file2.txt"), []byte("content2"), 0o644) - if err != nil { - t.Fatalf("failed to write file2.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Second commit") - parent2Hash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - err = os.WriteFile(filepath.Join(workDir, "file3.txt"), []byte("content3"), 0o644) - if err != nil { - t.Fatalf("failed to write file3.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - treeHash := gitCmd(t, repoPath, "--work-tree="+workDir, "write-tree") - - mergeCommitData := fmt.Sprintf("tree %s\nparent %s\nparent %s\nauthor Test Author <test@example.org> 1234567890 +0000\ncommitter Test Committer <committer@example.org> 1234567890 +0000\n\nMerge commit\n", - treeHash, parent1Hash, parent2Hash) - - cmd := gitHashObject(t, repoPath, "commit", []byte(mergeCommitData)) - mergeHash := cmd - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { - _ = repo.Close() - }() - - hash, _ := repo.ParseHash(mergeHash) - obj, _ := repo.ReadObject(hash) - commit := obj.(*StoredCommit) - - if len(commit.Parents) != 2 { - t.Fatalf("parents count: got %d, want 2", len(commit.Parents)) - } - - p1, _ := repo.ParseHash(parent1Hash) - p2, _ := repo.ParseHash(parent2Hash) - - if commit.Parents[0] != p1 { - t.Errorf("parent[0]: got %s, want %s", commit.Parents[0], parent1Hash) - } - if commit.Parents[1] != p2 { - t.Errorf("parent[1]: got %s, want %s", commit.Parents[1], parent2Hash) - } -} diff --git a/obj_tag.go b/obj_tag.go deleted file mode 100644 index b4502993..00000000 --- a/obj_tag.go +++ /dev/null @@ -1,169 +0,0 @@ -package furgit - -import ( - "bytes" - "errors" - "fmt" -) - -// Tag represents a Git annotated tag object. -type Tag struct { - // Target represents the hash of the object being tagged. - Target Hash - // TargetType represents the type of the object being tagged. - TargetType ObjectType - // Name represents the name of the tag. - Name []byte - // Tagger represents the identity of the tagger. - Tagger *Ident - // Message represents the tag message. - Message []byte -} - -// TODO: ExtraHeaders and signatures - -// StoredTag represents a tag stored in the object database. -type StoredTag struct { - Tag - hash Hash -} - -// Hash returns the hash of the stored tag. -func (sTag *StoredTag) Hash() Hash { - return sTag.hash -} - -// ObjectType returns the object type of the tag. -// -// It always returns ObjectTypeTag. -func (tag *Tag) ObjectType() ObjectType { - _ = tag - return ObjectTypeTag -} - -// parseTag parses a tag object body. -func parseTag(id Hash, body []byte, repo *Repository) (*StoredTag, error) { - t := new(StoredTag) - t.hash = id - i := 0 - var haveTarget, haveType bool - - for i < len(body) { - rel := bytes.IndexByte(body[i:], '\n') - if rel < 0 { - return nil, errors.New("furgit: tag: missing newline") - } - line := body[i : i+rel] - i += rel + 1 - if len(line) == 0 { - break - } - - switch { - case bytes.HasPrefix(line, []byte("object ")): - hash, err := repo.ParseHash(string(line[7:])) - if err != nil { - return nil, fmt.Errorf("furgit: tag: object: %w", err) - } - t.Target = hash - haveTarget = true - case bytes.HasPrefix(line, []byte("type ")): - switch string(line[5:]) { - case "commit": - t.TargetType = ObjectTypeCommit - case "tree": - t.TargetType = ObjectTypeTree - case "blob": - t.TargetType = ObjectTypeBlob - case "tag": - t.TargetType = ObjectTypeTag - default: - t.TargetType = ObjectTypeInvalid - return nil, errors.New("furgit: tag: unknown target type") - } - haveType = true - case bytes.HasPrefix(line, []byte("tag ")): - t.Name = append([]byte(nil), line[4:]...) - case bytes.HasPrefix(line, []byte("tagger ")): - idt, err := parseIdent(line[7:]) - if err != nil { - return nil, fmt.Errorf("furgit: tag: tagger: %w", err) - } - t.Tagger = idt - case bytes.HasPrefix(line, []byte("gpgsig ")), bytes.HasPrefix(line, []byte("gpgsig-sha256 ")): - for i < len(body) { - nextRel := bytes.IndexByte(body[i:], '\n') - if nextRel < 0 { - return nil, errors.New("furgit: tag: unterminated gpgsig") - } - if body[i] != ' ' { - break - } - i += nextRel + 1 - } - default: - // ignore unknown headers - } - } - - if !haveTarget || !haveType { - return nil, errors.New("furgit: tag: missing required headers") - } - - t.Message = append([]byte(nil), body[i:]...) - return t, nil -} - -func (tag *Tag) serialize() ([]byte, error) { - var buf bytes.Buffer - fmt.Fprintf(&buf, "object %s\n", tag.Target.String()) - buf.WriteString("type ") - switch tag.TargetType { - case ObjectTypeCommit: - buf.WriteString("commit") - case ObjectTypeTree: - buf.WriteString("tree") - case ObjectTypeBlob: - buf.WriteString("blob") - case ObjectTypeTag: - buf.WriteString("tag") - case ObjectTypeInvalid, ObjectTypeFuture, ObjectTypeOfsDelta, ObjectTypeRefDelta: - return nil, fmt.Errorf("furgit: tag: invalid target type %d", tag.TargetType) - default: - return nil, fmt.Errorf("furgit: tag: invalid target type %d", tag.TargetType) - } - buf.WriteByte('\n') - buf.WriteString("tag ") - buf.Write(tag.Name) - buf.WriteByte('\n') - if tag.Tagger != nil { - buf.WriteString("tagger ") - tb, err := tag.Tagger.Serialize() - if err != nil { - return nil, err - } - buf.Write(tb) - buf.WriteByte('\n') - } - buf.WriteByte('\n') - buf.Write(tag.Message) - - return buf.Bytes(), nil -} - -// Serialize renders the tag into its raw byte representation, -// including the header (i.e., "type size\0"). -func (tag *Tag) Serialize() ([]byte, error) { - body, err := tag.serialize() - if err != nil { - return nil, err - } - header, err := headerForType(ObjectTypeTag, body) - if err != nil { - return nil, err - } - raw := make([]byte, len(header)+len(body)) - copy(raw, header) - copy(raw[len(header):], body) - return raw, nil -} diff --git a/obj_tag_test.go b/obj_tag_test.go deleted file mode 100644 index 6b3c8368..00000000 --- a/obj_tag_test.go +++ /dev/null @@ -1,191 +0,0 @@ -package furgit - -import ( - "bytes" - "os" - "path/filepath" - "testing" - "time" -) - -func TestTagWrite(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "file.txt"), []byte("content"), 0o644) - if err != nil { - t.Fatalf("failed to write file.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Tagged commit") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - commitHashObj, _ := repo.ParseHash(commitHash) - - whenUnix := time.Now().Unix() - tag := &Tag{ - Target: commitHashObj, - TargetType: ObjectTypeCommit, - Name: []byte("v2.0.0"), - Tagger: &Ident{ - Name: []byte("Tagger Name"), - Email: []byte("tagger@test.org"), - WhenUnix: whenUnix, - OffsetMinutes: 120, - }, - Message: []byte("Release version 2.0.0\n"), - } - - tagHash, err := repo.WriteLooseObject(tag) - if err != nil { - t.Fatalf("WriteLooseObject failed: %v", err) - } - - gitType := string(gitCatFile(t, repoPath, "-t", tagHash.String())) - if gitType != "tag" { - t.Errorf("git type: got %q, want %q", gitType, "tag") - } - - readObj, err := repo.ReadObject(tagHash) - if err != nil { - t.Fatalf("ReadObject failed after write: %v", err) - } - readTag, ok := readObj.(*StoredTag) - if !ok { - t.Fatalf("expected *StoredTag, got %T", readObj) - } - - if !bytes.Equal(readTag.Name, []byte("v2.0.0")) { - t.Errorf("tag name: got %q, want %q", readTag.Name, "v2.0.0") - } - if !bytes.HasPrefix(readTag.Tagger.Name, []byte("Tagger Name")) { - t.Errorf("tagger name: got %q, want prefix %q", readTag.Tagger.Name, "Tagger Name") - } - if !bytes.Equal(readTag.Message, []byte("Release version 2.0.0\n")) { - t.Errorf("message: got %q, want %q", readTag.Message, "Release version 2.0.0\n") - } - - if tag.ObjectType() != ObjectTypeTag { - t.Errorf("ObjectType(): got %d, want %d", tag.ObjectType(), ObjectTypeTag) - } -} - -func TestTagRead(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "file.txt"), []byte("content"), 0o644) - if err != nil { - t.Fatalf("failed to write file.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Commit for tag") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - gitCmd(t, repoPath, "tag", "-a", "-m", "Tag message", "v1.0.0", commitHash) - tagHash := gitCmd(t, repoPath, "rev-parse", "v1.0.0") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hash, _ := repo.ParseHash(tagHash) - obj, err := repo.ReadObject(hash) - if err != nil { - t.Fatalf("ReadObject failed: %v", err) - } - - tag, ok := obj.(*StoredTag) - if !ok { - t.Fatalf("expected *StoredTag, got %T", obj) - } - - if !bytes.Equal(tag.Name, []byte("v1.0.0")) { - t.Errorf("name: got %q, want %q", tag.Name, "v1.0.0") - } - if tag.TargetType != ObjectTypeCommit { - t.Errorf("target type: got %d, want %d", tag.TargetType, ObjectTypeCommit) - } - if tag.Target.String() != commitHash { - t.Errorf("target: got %s, want %s", tag.Target, commitHash) - } -} - -func TestTagRoundtrip(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "file.txt"), []byte("content"), 0o644) - if err != nil { - t.Fatalf("failed to write file.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Commit") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - commitHashObj, _ := repo.ParseHash(commitHash) - - tag := &Tag{ - Target: commitHashObj, - TargetType: ObjectTypeCommit, - Name: []byte("v3.0.0"), - Tagger: &Ident{ - Name: []byte("Test Tagger"), - Email: []byte("tagger@example.org"), - WhenUnix: 123456789, - OffsetMinutes: 0, - }, - Message: []byte("Tag message\n"), - } - - tagHash, err := repo.WriteLooseObject(tag) - if err != nil { - t.Fatalf("WriteLooseObject failed: %v", err) - } - - obj, err := repo.ReadObject(tagHash) - if err != nil { - t.Fatalf("ReadObject failed: %v", err) - } - - readTag, ok := obj.(*StoredTag) - if !ok { - t.Fatalf("expected *StoredTag, got %T", obj) - } - - if !bytes.Equal(readTag.Name, tag.Name) { - t.Errorf("name: got %q, want %q", readTag.Name, tag.Name) - } - if readTag.Target != tag.Target { - t.Errorf("target: got %s, want %s", readTag.Target, tag.Target) - } - if readTag.TargetType != tag.TargetType { - t.Errorf("target type: got %d, want %d", readTag.TargetType, tag.TargetType) - } - if !bytes.Equal(readTag.Message, tag.Message) { - t.Errorf("message: got %q, want %q", readTag.Message, tag.Message) - } -} diff --git a/obj_test.go b/obj_test.go deleted file mode 100644 index 124127a5..00000000 --- a/obj_test.go +++ /dev/null @@ -1,52 +0,0 @@ -package furgit - -import ( - "fmt" - "testing" -) - -func TestObjectTypeSize(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - testData := []byte("Test data for size check") - gitHash := gitHashObject(t, repoPath, "blob", testData) - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hash, _ := repo.ParseHash(gitHash) - ty, size, err := repo.ReadObjectTypeSize(hash) - if err != nil { - t.Fatalf("ReadObjectTypeSize failed: %v", err) - } - - if ty != ObjectTypeBlob { - t.Errorf("type: got %d, want %d", ty, ObjectTypeBlob) - } - - gitSize := string(gitCatFile(t, repoPath, "-s", gitHash)) - if size != int64(len(testData)) || gitSize != fmt.Sprintf("%d", size) { - t.Errorf("size mismatch: furgit=%d git=%s expected=%d", size, gitSize, len(testData)) - } -} - -func TestReadObjectInvalid(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - invalidHash, _ := repo.ParseHash("0000000000000000000000000000000000000000000000000000000000000000") - _, err = repo.ReadObject(invalidHash) - if err == nil { - t.Error("expected error for invalid object") - } -} diff --git a/obj_tree.go b/obj_tree.go deleted file mode 100644 index 01b0651f..00000000 --- a/obj_tree.go +++ /dev/null @@ -1,310 +0,0 @@ -package furgit - -import ( - "bytes" - "errors" - "fmt" - "sort" - "strconv" -) - -// Tree represents a Git tree object. -type Tree struct { - // Entries represents the entries in the tree. - Entries []TreeEntry -} - -// StoredTree represents a tree stored in the object database. -type StoredTree struct { - Tree - hash Hash -} - -// Hash returns the hash of the stored tree. -func (sTree *StoredTree) Hash() Hash { - return sTree.hash -} - -// FileMode represents the mode of a file in a Git tree. -type FileMode uint32 - -const ( - // FileModeDir represents a directory (tree) in a Git tree. - FileModeDir FileMode = 0o40000 - // FileModeRegular represents a regular file (blob) in a Git tree. - FileModeRegular FileMode = 0o100644 - // FileModeExecutable represents an executable file (blob) in a Git tree. - FileModeExecutable FileMode = 0o100755 - // FileModeSymlink represents a symbolic link (blob) in a Git tree. - FileModeSymlink FileMode = 0o120000 - // FileModeGitlink represents a Git link (submodule) in a Git tree. - FileModeGitlink FileMode = 0o160000 -) - -// TreeEntry represents a single entry in a Git tree. -type TreeEntry struct { - // Mode represents the file mode of the entry. - Mode FileMode - // Name represents the name of the entry. - Name []byte - // ID represents the hash of the entry. This is typically - // either a blob or a tree. - ID Hash -} - -// ObjectType returns the object type of the tree. -// -// It always returns ObjectTypeTree. -func (tree *Tree) ObjectType() ObjectType { - _ = tree - return ObjectTypeTree -} - -// parseTree decodes a tree body. -func parseTree(id Hash, body []byte, repo *Repository) (*StoredTree, error) { - var entries []TreeEntry - i := 0 - for i < len(body) { - space := bytes.IndexByte(body[i:], ' ') - if space < 0 { - return nil, errors.New("furgit: tree: missing mode terminator") - } - modeBytes := body[i : i+space] - i += space + 1 - - nul := bytes.IndexByte(body[i:], 0) - if nul < 0 { - return nil, errors.New("furgit: tree: missing name terminator") - } - nameBytes := body[i : i+nul] - i += nul + 1 - - if i+repo.hashAlgo.Size() > len(body) { - return nil, errors.New("furgit: tree: truncated child hash") - } - var child Hash - copy(child.data[:], body[i:i+repo.hashAlgo.Size()]) - child.algo = repo.hashAlgo - i += repo.hashAlgo.Size() - - mode, err := strconv.ParseUint(string(modeBytes), 8, 32) - if err != nil { - return nil, fmt.Errorf("furgit: tree: parse mode: %w", err) - } - - entry := TreeEntry{ - Mode: FileMode(mode), - Name: append([]byte(nil), nameBytes...), - ID: child, - } - entries = append(entries, entry) - } - - return &StoredTree{ - hash: id, - Tree: Tree{ - Entries: entries, - }, - }, nil -} - -// treeBody builds the entry list for a tree without the Git header. -func (tree *Tree) serialize() []byte { - var bodyLen int - for _, e := range tree.Entries { - mode := strconv.FormatUint(uint64(e.Mode), 8) - bodyLen += len(mode) + 1 + len(e.Name) + 1 + e.ID.Size() - } - - body := make([]byte, bodyLen) - pos := 0 - for _, e := range tree.Entries { - mode := strconv.FormatUint(uint64(e.Mode), 8) - pos += copy(body[pos:], []byte(mode)) - body[pos] = ' ' - pos++ - pos += copy(body[pos:], e.Name) - body[pos] = 0 - pos++ - size := e.ID.Size() - pos += copy(body[pos:], e.ID.data[:size]) - } - - return body -} - -// Serialize renders the tree into its raw byte representation, -// including the header (i.e., "type size\0"). -func (tree *Tree) Serialize() ([]byte, error) { - body := tree.serialize() - header, err := headerForType(ObjectTypeTree, body) - if err != nil { - return nil, err - } - - raw := make([]byte, len(header)+len(body)) - copy(raw, header) - copy(raw[len(header):], body) - return raw, nil -} - -// Entry looks up a tree entry by name. -// -// Lookups are not recursive. -// It returns nil if no such entry exists. -func (tree *Tree) Entry(name []byte) *TreeEntry { - if len(tree.Entries) == 0 { - return nil - } - - if e := tree.entry(name, true); e != nil { - return e - } - - return tree.entry(name, false) -} - -// EntryRecursive looks up a tree entry by path. -// -// Lookups are recursive. -func (sTree *StoredTree) EntryRecursive(repo *Repository, path [][]byte) (*TreeEntry, error) { - if len(path) == 0 { - return nil, errors.New("furgit: tree: empty path") - } - - currentTree := sTree - for i, part := range path { - entry := currentTree.Entry(part) - if entry == nil { - return nil, ErrNotFound - } - if i == len(path)-1 { - return entry, nil - } - obj, err := repo.ReadObject(entry.ID) - if err != nil { - return nil, err - } - nextTree, ok := obj.(*StoredTree) - if !ok { - return nil, fmt.Errorf("furgit: tree: expected tree object at %s, got %T", part, obj) - // TODO: It may be useful to check the mode instead of reporting - // an object type error. - } - currentTree = nextTree - } - - return nil, ErrNotFound -} - -func (tree *Tree) entry(name []byte, searchIsTree bool) *TreeEntry { - low, high := 0, len(tree.Entries)-1 - for low <= high { - mid := low + (high-low)/2 - entry := &tree.Entries[mid] - - cmp := TreeEntryNameCompare(entry.Name, entry.Mode, name, searchIsTree) - if cmp == 0 { - if bytes.Equal(entry.Name, name) { - return entry - } - return nil - } - if cmp < 0 { - low = mid + 1 - } else { - high = mid - 1 - } - } - return nil -} - -// InsertEntry inserts a tree entry while preserving Git's name ordering. -// It returns an error if an entry with the same name already exists. -func (tree *Tree) InsertEntry(newEntry TreeEntry) error { - if tree == nil { - return ErrInvalidObject - } - for _, entry := range tree.Entries { - if bytes.Equal(entry.Name, newEntry.Name) { - return fmt.Errorf("furgit: tree: entry %q already exists", newEntry.Name) - } - } - newIsTree := newEntry.Mode == FileModeDir - insertAt := sort.Search(len(tree.Entries), func(i int) bool { - return TreeEntryNameCompare(tree.Entries[i].Name, tree.Entries[i].Mode, newEntry.Name, newIsTree) >= 0 - }) - tree.Entries = append(tree.Entries, TreeEntry{}) - copy(tree.Entries[insertAt+1:], tree.Entries[insertAt:]) - tree.Entries[insertAt] = newEntry - return nil -} - -// RemoveEntry removes a tree entry by name. -// It returns ErrNotFound if no matching entry exists. -func (tree *Tree) RemoveEntry(name []byte) error { - if tree == nil { - return ErrInvalidObject - } - if len(tree.Entries) == 0 { - return ErrNotFound - } - for i := range tree.Entries { - if bytes.Equal(tree.Entries[i].Name, name) { - copy(tree.Entries[i:], tree.Entries[i+1:]) - tree.Entries = tree.Entries[:len(tree.Entries)-1] - return nil - } - } - return ErrNotFound -} - -// TreeEntryNameCompare compares names using Git's tree ordering rules. -func TreeEntryNameCompare(entryName []byte, entryMode FileMode, searchName []byte, searchIsTree bool) int { - isEntryTree := entryMode == FileModeDir - - entryLen := len(entryName) - if isEntryTree { - entryLen++ - } - searchLen := len(searchName) - if searchIsTree { - searchLen++ - } - - n := entryLen - if searchLen < n { - n = searchLen - } - - for i := 0; i < n; i++ { - var ec, sc byte - - if i < len(entryName) { - ec = entryName[i] - } else { - ec = '/' - } - - if i < len(searchName) { - sc = searchName[i] - } else { - sc = '/' - } - - if ec < sc { - return -1 - } - if ec > sc { - return 1 - } - } - - if entryLen < searchLen { - return -1 - } - if entryLen > searchLen { - return 1 - } - return 0 -} diff --git a/obj_tree_test.go b/obj_tree_test.go deleted file mode 100644 index 605e9a15..00000000 --- a/obj_tree_test.go +++ /dev/null @@ -1,474 +0,0 @@ -package furgit - -import ( - "bytes" - "fmt" - "os" - "path/filepath" - "strings" - "testing" -) - -func TestTreeWrite(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - blobData := []byte("file content") - blobHash := gitHashObject(t, repoPath, "blob", blobData) - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - blobHashObj, _ := repo.ParseHash(blobHash) - tree := &Tree{ - Entries: []TreeEntry{ - {Mode: 0o100644, Name: []byte("file.txt"), ID: blobHashObj}, - }, - } - - treeHash, err := repo.WriteLooseObject(tree) - if err != nil { - t.Fatalf("WriteLooseObject failed: %v", err) - } - - gitType := string(gitCatFile(t, repoPath, "-t", treeHash.String())) - if gitType != "tree" { - t.Errorf("git type: got %q, want %q", gitType, "tree") - } - - gitLsTree := gitCmd(t, repoPath, "ls-tree", treeHash.String()) - if !strings.Contains(gitLsTree, "file.txt") { - t.Errorf("git ls-tree doesn't contain file.txt: %s", gitLsTree) - } - if !strings.Contains(gitLsTree, blobHash) { - t.Errorf("git ls-tree doesn't contain blob hash: %s", gitLsTree) - } -} - -func TestTreeRead(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "a.txt"), []byte("content a"), 0o644) - if err != nil { - t.Fatalf("failed to write a.txt: %v", err) - } - err = os.WriteFile(filepath.Join(workDir, "b.txt"), []byte("content b"), 0o644) - if err != nil { - t.Fatalf("failed to write b.txt: %v", err) - } - err = os.WriteFile(filepath.Join(workDir, "c.txt"), []byte("content c"), 0o644) - if err != nil { - t.Fatalf("failed to write c.txt: %v", err) - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - treeHash := gitCmd(t, repoPath, "--work-tree="+workDir, "write-tree") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hash, _ := repo.ParseHash(treeHash) - obj, err := repo.ReadObject(hash) - if err != nil { - t.Fatalf("ReadObject failed: %v", err) - } - - tree, ok := obj.(*StoredTree) - if !ok { - t.Fatalf("expected *StoredTree, got %T", obj) - } - - if len(tree.Entries) != 3 { - t.Fatalf("entries count: got %d, want 3", len(tree.Entries)) - } - - expectedNames := []string{"a.txt", "b.txt", "c.txt"} - for i, expected := range expectedNames { - if string(tree.Entries[i].Name) != expected { - t.Errorf("entry[%d] name: got %q, want %q", i, tree.Entries[i].Name, expected) - } - } - - if tree.ObjectType() != ObjectTypeTree { - t.Errorf("ObjectType(): got %d, want %d", tree.ObjectType(), ObjectTypeTree) - } -} - -func TestTreeEntry(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "a.txt"), []byte("content a"), 0o644) - if err != nil { - t.Fatalf("failed to write a.txt: %v", err) - } - err = os.WriteFile(filepath.Join(workDir, "b.txt"), []byte("content b"), 0o644) - if err != nil { - t.Fatalf("failed to write b.txt: %v", err) - } - err = os.WriteFile(filepath.Join(workDir, "c.txt"), []byte("content c"), 0o644) - if err != nil { - t.Fatalf("failed to write c.txt: %v", err) - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - treeHash := gitCmd(t, repoPath, "--work-tree="+workDir, "write-tree") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hash, _ := repo.ParseHash(treeHash) - obj, _ := repo.ReadObject(hash) - tree := obj.(*StoredTree) - - entry := tree.Entry([]byte("b.txt")) - if entry == nil { - t.Fatal("Entry returned nil for existing entry") - } - if !bytes.Equal(entry.Name, []byte("b.txt")) { - t.Errorf("entry name: got %q, want %q", entry.Name, "b.txt") - } - - notFound := tree.Entry([]byte("notfound.txt")) - if notFound != nil { - t.Error("Entry returned non-nil for non-existing entry") - } -} - -func TestTreeEntryRecursive(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.MkdirAll(filepath.Join(workDir, "dir"), 0o755) - if err != nil { - t.Fatalf("failed to create dir: %v", err) - } - err = os.WriteFile(filepath.Join(workDir, "file1.txt"), []byte("file1"), 0o644) - if err != nil { - t.Fatalf("failed to write file1.txt: %v", err) - } - err = os.WriteFile(filepath.Join(workDir, "file2.txt"), []byte("file2"), 0o644) - if err != nil { - t.Fatalf("failed to write file2.txt: %v", err) - } - err = os.WriteFile(filepath.Join(workDir, "dir", "nested.txt"), []byte("nested"), 0o644) - if err != nil { - t.Fatalf("failed to write dir/nested.txt: %v", err) - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - treeHash := gitCmd(t, repoPath, "--work-tree="+workDir, "write-tree") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hash, _ := repo.ParseHash(treeHash) - obj, _ := repo.ReadObject(hash) - tree := obj.(*StoredTree) - - entry, err := tree.EntryRecursive(repo, [][]byte{[]byte("file1.txt")}) - if err != nil { - t.Fatalf("EntryRecursive file1.txt failed: %v", err) - } - if !bytes.Equal(entry.Name, []byte("file1.txt")) { - t.Errorf("entry name: got %q, want %q", entry.Name, "file1.txt") - } - - gitShow := string(gitCatFile(t, repoPath, "blob", entry.ID.String())) - if gitShow != "file1" { - t.Errorf("file1 content from git: got %q, want %q", gitShow, "file1") - } - - nestedEntry, err := tree.EntryRecursive(repo, [][]byte{[]byte("dir"), []byte("nested.txt")}) - if err != nil { - t.Fatalf("EntryRecursive dir/nested.txt failed: %v", err) - } - if !bytes.Equal(nestedEntry.Name, []byte("nested.txt")) { - t.Errorf("nested entry name: got %q, want %q", nestedEntry.Name, "nested.txt") - } - - gitShowNested := string(gitCatFile(t, repoPath, "blob", nestedEntry.ID.String())) - if gitShowNested != "nested" { - t.Errorf("nested content from git: got %q, want %q", gitShowNested, "nested") - } - - _, err = tree.EntryRecursive(repo, [][]byte{[]byte("nonexistent.txt")}) - if err == nil { - t.Error("expected error for nonexistent path") - } - - _, err = tree.EntryRecursive(repo, [][]byte{}) - if err == nil { - t.Error("expected error for empty path") - } -} - -func TestTreeLarge(t *testing.T) { - if testing.Short() { - t.Skip("skipping large tree test in short mode") - } - - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitCmd(t, repoPath, "config", "gc.auto", "0") - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - numFiles := 1000 - for i := 0; i < numFiles; i++ { - filename := filepath.Join(workDir, fmt.Sprintf("file%04d.txt", i)) - content := fmt.Sprintf("Content for file %d\n", i) - err := os.WriteFile(filename, []byte(content), 0o644) - if err != nil { - t.Fatalf("failed to write %s: %v", filename, err) - } - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - treeHash := gitCmd(t, repoPath, "--work-tree="+workDir, "write-tree") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hash, _ := repo.ParseHash(treeHash) - obj, _ := repo.ReadObject(hash) - tree := obj.(*StoredTree) - - if len(tree.Entries) != numFiles { - t.Errorf("tree entries: got %d, want %d", len(tree.Entries), numFiles) - } - - gitCount := gitCmd(t, repoPath, "ls-tree", treeHash) - gitLines := strings.Count(gitCount, "\n") + 1 - if len(tree.Entries) != gitLines { - t.Errorf("furgit found %d entries, git found %d", len(tree.Entries), gitLines) - } - - for i := 0; i < 10; i++ { - idx := i * (numFiles / 10) - expectedName := fmt.Sprintf("file%04d.txt", idx) - entry := tree.Entry([]byte(expectedName)) - if entry == nil { - t.Errorf("expected to find entry %s", expectedName) - continue - } - - blobObj, _ := repo.ReadObject(entry.ID) - blob := blobObj.(*StoredBlob) - - expectedContent := fmt.Sprintf("Content for file %d\n", idx) - if string(blob.Data) != expectedContent { - t.Errorf("blob %s: got %q, want %q", expectedName, blob.Data, expectedContent) - } - - gitData := gitCatFile(t, repoPath, "blob", entry.ID.String()) - if !bytes.Equal(blob.Data, gitData) { - t.Errorf("blob %s: furgit data doesn't match git data", expectedName) - } - } -} - -func TestTreeInsertEntry(t *testing.T) { - tree := &Tree{ - Entries: []TreeEntry{ - {Mode: FileModeRegular, Name: []byte("alpha"), ID: Hash{}}, - {Mode: FileModeRegular, Name: []byte("gamma"), ID: Hash{}}, - }, - } - - if err := tree.InsertEntry(TreeEntry{Mode: FileModeRegular, Name: []byte("beta"), ID: Hash{}}); err != nil { - t.Fatalf("InsertEntry failed: %v", err) - } - if len(tree.Entries) != 3 { - t.Fatalf("entries count: got %d, want 3", len(tree.Entries)) - } - if string(tree.Entries[1].Name) != "beta" { - t.Fatalf("inserted order mismatch: got %q, want %q", tree.Entries[1].Name, "beta") - } - - if err := tree.InsertEntry(TreeEntry{Mode: FileModeRegular, Name: []byte("beta"), ID: Hash{}}); err == nil { - t.Fatal("expected duplicate insert error") - } - - var nilTree *Tree - if err := nilTree.InsertEntry(TreeEntry{Mode: FileModeRegular, Name: []byte("x"), ID: Hash{}}); err == nil { - t.Fatal("expected error for nil tree") - } -} - -func TestTreeRemoveEntry(t *testing.T) { - tree := &Tree{ - Entries: []TreeEntry{ - {Mode: FileModeRegular, Name: []byte("alpha"), ID: Hash{}}, - {Mode: FileModeRegular, Name: []byte("beta"), ID: Hash{}}, - {Mode: FileModeRegular, Name: []byte("gamma"), ID: Hash{}}, - }, - } - - if err := tree.RemoveEntry([]byte("beta")); err != nil { - t.Fatalf("RemoveEntry failed: %v", err) - } - if len(tree.Entries) != 2 { - t.Fatalf("entries count: got %d, want 2", len(tree.Entries)) - } - if string(tree.Entries[0].Name) != "alpha" || string(tree.Entries[1].Name) != "gamma" { - t.Fatalf("remove order mismatch: got %q, %q", tree.Entries[0].Name, tree.Entries[1].Name) - } - - if err := tree.RemoveEntry([]byte("beta")); err == nil { - t.Fatal("expected ErrNotFound for missing entry") - } - - var nilTree *Tree - if err := nilTree.RemoveEntry([]byte("alpha")); err == nil { - t.Fatal("expected error for nil tree") - } -} - -func TestTreeEntryNameCompare(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - entryName []byte - entryMode FileMode - searchName []byte - searchIsTree bool - want int - }{ - { - name: "equal file names", - entryName: []byte("alpha"), - entryMode: FileModeRegular, - searchName: []byte("alpha"), - want: 0, - }, - { - name: "equal tree names", - entryName: []byte("dir"), - entryMode: FileModeDir, - searchName: []byte("dir"), - searchIsTree: true, - want: 0, - }, - { - name: "lexicographic less", - entryName: []byte("alpha"), - entryMode: FileModeRegular, - searchName: []byte("beta"), - want: -1, - }, - { - name: "lexicographic greater", - entryName: []byte("gamma"), - entryMode: FileModeRegular, - searchName: []byte("beta"), - want: 1, - }, - { - name: "file sorts before same-name dir", - entryName: []byte("same"), - entryMode: FileModeRegular, - searchName: []byte("same"), - searchIsTree: true, - want: -1, - }, - { - name: "dir sorts after same-name file", - entryName: []byte("same"), - entryMode: FileModeDir, - searchName: []byte("same"), - searchIsTree: false, - want: 1, - }, - { - name: "dir sorts before longer file", - entryName: []byte("a"), - entryMode: FileModeDir, - searchName: []byte("ab"), - searchIsTree: false, - want: -1, - }, - { - name: "file sorts before longer file", - entryName: []byte("a"), - entryMode: FileModeRegular, - searchName: []byte("ab"), - want: -1, - }, - { - name: "search tree compares after exact file name", - entryName: []byte("a"), - entryMode: FileModeRegular, - searchName: []byte("a"), - searchIsTree: true, - want: -1, - }, - { - name: "entry tree compares after exact search file", - entryName: []byte("a"), - entryMode: FileModeDir, - searchName: []byte("a"), - searchIsTree: false, - want: 1, - }, - { - name: "slash impact mid-compare", - entryName: []byte("a"), - entryMode: FileModeDir, - searchName: []byte("a0"), - searchIsTree: false, - want: -1, - }, - { - name: "file sorts after same prefix dir", - entryName: []byte("a0"), - entryMode: FileModeRegular, - searchName: []byte("a"), - searchIsTree: true, - want: 1, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - got := TreeEntryNameCompare(tt.entryName, tt.entryMode, tt.searchName, tt.searchIsTree) - if got < 0 { - got = -1 - } else if got > 0 { - got = 1 - } - if got != tt.want { - t.Fatalf("compare(%q,%v,%q,%v) = %d, want %d", tt.entryName, tt.entryMode, tt.searchName, tt.searchIsTree, got, tt.want) - } - }) - } -} diff --git a/pack_idx_read.go b/pack_idx_read.go deleted file mode 100644 index 0dbb9bcf..00000000 --- a/pack_idx_read.go +++ /dev/null @@ -1,290 +0,0 @@ -package furgit - -import ( - "bytes" - "errors" - "fmt" - "os" - "path/filepath" - "strings" - "sync" - "syscall" -) - -const ( - idxMagic = 0xff744f63 - idxVersion2 = 2 -) - -type packIndex struct { - repo *Repository - idxRel string - packPath string - - loadOnce sync.Once - loadErr error - - numObjects int - fanout []byte - names []byte - crcs []byte - offset32 []byte - offset64 []byte - data []byte - - closeOnce sync.Once -} - -func (pi *packIndex) Close() error { - if pi == nil { - return nil - } - var closeErr error - pi.closeOnce.Do(func() { - if len(pi.data) > 0 { - if err := syscall.Munmap(pi.data); closeErr == nil { - closeErr = err - } - pi.data = nil - pi.fanout = nil - pi.names = nil - pi.crcs = nil - pi.offset32 = nil - pi.offset64 = nil - pi.numObjects = 0 - } - }) - return closeErr -} - -func (pi *packIndex) ensureLoaded() error { - pi.loadOnce.Do(func() { - pi.loadErr = pi.load() - }) - return pi.loadErr -} - -func (pi *packIndex) load() error { - if pi.repo == nil { - return ErrInvalidObject - } - f, err := os.Open(pi.repo.repoPath(pi.idxRel)) - if err != nil { - return err - } - stat, err := f.Stat() - if err != nil { - _ = f.Close() - return err - } - if stat.Size() < 8+256*4 { - _ = f.Close() - return ErrInvalidObject - } - region, err := syscall.Mmap( - int(f.Fd()), - 0, - int(stat.Size()), - syscall.PROT_READ, - syscall.MAP_PRIVATE, - ) - if err != nil { - _ = f.Close() - return err - } - err = f.Close() - if err != nil { - _ = syscall.Munmap(region) - return err - } - err = pi.parse(region) - if err != nil { - _ = syscall.Munmap(region) - return err - } - pi.data = region - return nil -} - -func (repo *Repository) packIndexes() ([]*packIndex, error) { - repo.packIdxOnce.Do(func() { - repo.packIdx, repo.packIdxErr = repo.loadPackIndexes() - }) - return repo.packIdx, repo.packIdxErr -} - -func (repo *Repository) loadPackIndexes() ([]*packIndex, error) { - dir := filepath.Join(repo.rootPath, "objects", "pack") - entries, err := os.ReadDir(dir) - if err != nil { - if os.IsNotExist(err) { - return nil, ErrNotFound - } - return nil, err - } - - idxs := make([]*packIndex, 0, len(entries)) - for _, entry := range entries { - if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".idx") { - continue - } - rel := filepath.Join("objects", "pack", entry.Name()) - packRel := strings.TrimSuffix(rel, ".idx") + ".pack" - idxs = append(idxs, &packIndex{ - repo: repo, - idxRel: rel, - packPath: packRel, - }) - } - if len(idxs) == 0 { - return nil, ErrNotFound - } - return idxs, nil -} - -func (pi *packIndex) parse(buf []byte) error { - if len(buf) < 8+256*4 { - return ErrInvalidObject - } - if readBE32(buf[0:4]) != idxMagic { - return ErrInvalidObject - } - if readBE32(buf[4:8]) != idxVersion2 { - return ErrInvalidObject - } - - const fanoutBytes = 256 * 4 - fanoutStart := 8 - fanoutEnd := fanoutStart + fanoutBytes - if fanoutEnd > len(buf) { - return ErrInvalidObject - } - pi.fanout = buf[fanoutStart:fanoutEnd] - nobj := int(readBE32(pi.fanout[len(pi.fanout)-4:])) - - namesStart := fanoutEnd - namesEnd := namesStart + nobj*pi.repo.hashAlgo.Size() - if namesEnd > len(buf) { - return ErrInvalidObject - } - - crcStart := namesEnd - crcEnd := crcStart + nobj*4 - if crcEnd > len(buf) { - return ErrInvalidObject - } - - off32Start := crcEnd - off32End := off32Start + nobj*4 - if off32End > len(buf) { - return ErrInvalidObject - } - - pi.offset32 = buf[off32Start:off32End] - - off64Start := off32End - trailerStart := len(buf) - 2*pi.repo.hashAlgo.Size() - if trailerStart < off64Start { - return ErrInvalidObject - } - if (trailerStart-off64Start)%8 != 0 { - return ErrInvalidObject - } - off64End := trailerStart - pi.offset64 = buf[off64Start:off64End] - - pi.numObjects = nobj - pi.names = buf[namesStart:namesEnd] - pi.crcs = buf[crcStart:crcEnd] - return nil -} - -func readBE32(b []byte) uint32 { - _ = b[3] - return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3]) -} - -func readBE64(b []byte) uint64 { - _ = b[7] - return (uint64(b[0]) << 56) | (uint64(b[1]) << 48) | - (uint64(b[2]) << 40) | (uint64(b[3]) << 32) | - (uint64(b[4]) << 24) | (uint64(b[5]) << 16) | - (uint64(b[6]) << 8) | uint64(b[7]) -} - -func (pi *packIndex) fanoutEntry(i int) uint32 { - if len(pi.fanout) == 0 { - return 0 - } - entries := len(pi.fanout) / 4 - if i < 0 || i >= entries { - return 0 - } - start := i * 4 - return readBE32(pi.fanout[start : start+4]) -} - -func (pi *packIndex) offset(idx int) (uint64, error) { - start := idx * 4 - word := readBE32(pi.offset32[start : start+4]) - if word&0x80000000 == 0 { - return uint64(word), nil - } - pos := int(word & 0x7fffffff) - entries := len(pi.offset64) / 8 - if pos < 0 || pos >= entries { - return 0, errors.New("furgit: pack: corrupt 64-bit offset table") - } - base := pos * 8 - return readBE64(pi.offset64[base : base+8]), nil -} - -func (pi *packIndex) lookup(id Hash) (packlocation, error) { - err := pi.ensureLoaded() - if err != nil { - return packlocation{}, err - } - if id.algo != pi.repo.hashAlgo { - return packlocation{}, fmt.Errorf("furgit: hash algorithm mismatch: got %s, expected %s", id.algo.String(), pi.repo.hashAlgo.String()) - } - first := int(id.data[0]) - var lo int - if first > 0 { - lo = int(pi.fanoutEntry(first - 1)) - } - hi := int(pi.fanoutEntry(first)) - idx, found := bsearchHash(pi.names, pi.repo.hashAlgo.Size(), lo, hi, id) - if !found { - return packlocation{}, ErrNotFound - } - ofs, err := pi.offset(idx) - if err != nil { - return packlocation{}, err - } - return packlocation{ - PackPath: pi.packPath, - Offset: ofs, - }, nil -} - -func bsearchHash(names []byte, stride, lo, hi int, want Hash) (int, bool) { - for lo < hi { - mid := lo + (hi-lo)/2 - cmp := compareHash(names, stride, mid, want.data[:stride]) - if cmp == 0 { - return mid, true - } - if cmp > 0 { - hi = mid - } else { - lo = mid + 1 - } - } - return lo, false -} - -func compareHash(names []byte, stride, idx int, want []byte) int { - base := idx * stride - end := base + stride - return bytes.Compare(names[base:end], want) -} diff --git a/pack_pack_read.go b/pack_pack_read.go deleted file mode 100644 index 56098ee5..00000000 --- a/pack_pack_read.go +++ /dev/null @@ -1,578 +0,0 @@ -package furgit - -import ( - "encoding/binary" - "errors" - "io" - "os" - "sync" - "syscall" - - "codeberg.org/lindenii/furgit/internal/bufpool" - "codeberg.org/lindenii/furgit/internal/zlibx" -) - -const ( - packMagic = 0x5041434b - packVersion2 = 2 -) - -type packlocation struct { - PackPath string - Offset uint64 -} - -func (repo *Repository) packRead(id Hash) (ObjectType, bufpool.Buffer, error) { - loc, err := repo.packIndexFind(id) - if err != nil { - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - return repo.packReadAt(loc, id) -} - -func (repo *Repository) packIndexFind(id Hash) (packlocation, error) { - idxs, err := repo.packIndexes() - if err != nil { - return packlocation{}, err - } - for _, idx := range idxs { - loc, err := idx.lookup(id) - if errors.Is(err, ErrNotFound) { - continue - } - if err != nil { - return packlocation{}, err - } - return loc, nil - } - return packlocation{}, ErrNotFound -} - -func (repo *Repository) packReadAt(loc packlocation, want Hash) (ObjectType, bufpool.Buffer, error) { - ty, body, err := repo.packBodyResolveAtLocation(loc) - if err != nil { - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - return ty, body, nil -} - -func (repo *Repository) packBodyResolveAtLocation(loc packlocation) (ObjectType, bufpool.Buffer, error) { - pf, err := repo.packFile(loc.PackPath) - if err != nil { - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - return repo.packBodyResolveWithin(pf, loc.Offset) -} - -func (repo *Repository) packTypeSizeAtLocation(loc packlocation, seen map[packKey]struct{}) (ObjectType, int64, error) { - pf, err := repo.packFile(loc.PackPath) - if err != nil { - return ObjectTypeInvalid, 0, err - } - return repo.packTypeSizeWithin(pf, loc.Offset, seen) -} - -func packHeaderParse(data []byte) (ObjectType, int, int, error) { - if len(data) == 0 { - return ObjectTypeInvalid, 0, 0, io.ErrUnexpectedEOF - } - b := data[0] - ty := ObjectType((b >> 4) & 0x07) - size := int(b & 0x0f) - shift := 4 - consumed := 1 - for (b & 0x80) != 0 { - if consumed >= len(data) { - return ObjectTypeInvalid, 0, 0, io.ErrUnexpectedEOF - } - b = data[consumed] - size |= int(b&0x7f) << shift - shift += 7 - consumed++ - } - return ty, size, consumed, nil -} - -func packSectionInflate(pf *packFile, start uint64, sizeHint int) (bufpool.Buffer, error) { - if start > uint64(len(pf.data)) { - return bufpool.Buffer{}, ErrInvalidObject - } - body, _, err := zlibx.DecompressSized(pf.data[start:], sizeHint) - if err != nil { - return bufpool.Buffer{}, err - } - if sizeHint > 0 && len(body.Bytes()) != sizeHint { - body.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - return body, nil -} - -func packDeltaReadOfsDistance(data []byte) (uint64, int, error) { - if len(data) == 0 { - return 0, 0, io.ErrUnexpectedEOF - } - b := data[0] - dist := uint64(b & 0x7f) - consumed := 1 - for (b & 0x80) != 0 { - if consumed >= len(data) { - return 0, 0, io.ErrUnexpectedEOF - } - b = data[consumed] - consumed++ - dist = ((dist + 1) << 7) + uint64(b&0x7f) - } - return dist, consumed, nil -} - -type packKey struct { - path string - ofs uint64 -} - -func (repo *Repository) packTypeSizeWithin(pf *packFile, ofs uint64, seen map[packKey]struct{}) (ObjectType, int64, error) { - if pf == nil { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - if seen == nil { - seen = make(map[packKey]struct{}) - } - var visited []packKey - defer func() { - for _, key := range visited { - delete(seen, key) - } - }() - - var declaredSize int64 - firstHeader := true - - for { - key := packKey{path: pf.relPath, ofs: ofs} - if _, dup := seen[key]; dup { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - seen[key] = struct{}{} - visited = append(visited, key) - - if ofs >= uint64(len(pf.data)) { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - ty, size, consumed, err := packHeaderParse(pf.data[ofs:]) - if err != nil { - return ObjectTypeInvalid, 0, err - } - if firstHeader { - declaredSize = int64(size) - firstHeader = false - } - - if uint64(consumed) > uint64(len(pf.data))-ofs { - return ObjectTypeInvalid, 0, io.ErrUnexpectedEOF - } - dataStart := ofs + uint64(consumed) - switch ty { - case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: - return ty, declaredSize, nil - case ObjectTypeRefDelta: - hashEnd := dataStart + uint64(repo.hashAlgo.Size()) - if hashEnd > uint64(len(pf.data)) { - return ObjectTypeInvalid, 0, io.ErrUnexpectedEOF - } - var base Hash - copy(base.data[:], pf.data[dataStart:hashEnd]) - base.algo = repo.hashAlgo - loc, err := repo.packIndexFind(base) - if err == nil { - pf, err = repo.packFile(loc.PackPath) - if err != nil { - return ObjectTypeInvalid, 0, err - } - ofs = loc.Offset - continue - } - if !errors.Is(err, ErrNotFound) { - return ObjectTypeInvalid, 0, err - } - baseTy, _, err := repo.looseTypeSize(base) - if err != nil { - return ObjectTypeInvalid, 0, err - } - return baseTy, declaredSize, nil - case ObjectTypeOfsDelta: - dist, distConsumed, err := packDeltaReadOfsDistance(pf.data[dataStart:]) - if err != nil { - return ObjectTypeInvalid, 0, err - } - if ofs <= dist { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - dataStart += uint64(distConsumed) - if dataStart > uint64(len(pf.data)) { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - ofs -= dist - case ObjectTypeInvalid, ObjectTypeFuture: - return ObjectTypeInvalid, 0, ErrInvalidObject - default: - return ObjectTypeInvalid, 0, ErrInvalidObject - } - } -} - -func (repo *Repository) packBodyResolveWithin(pf *packFile, ofs uint64) (ObjectType, bufpool.Buffer, error) { - if pf == nil { - return ObjectTypeInvalid, bufpool.Buffer{}, ErrInvalidObject - } - - type deltaFrame struct { - delta bufpool.Buffer - } - var frames []deltaFrame - defer func() { - for i := range frames { - frames[i].delta.Release() - } - }() - - var ( - body bufpool.Buffer - bodyReady bool - resultTy ObjectType - ) - fail := func(err error) (ObjectType, bufpool.Buffer, error) { - if bodyReady { - body.Release() - bodyReady = false - } - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - - resolved := false - for !resolved { - if ofs >= uint64(len(pf.data)) { - return fail(ErrInvalidObject) - } - ty, size, consumed, err := packHeaderParse(pf.data[ofs:]) - if err != nil { - return fail(err) - } - if uint64(consumed) > uint64(len(pf.data))-ofs { - return fail(io.ErrUnexpectedEOF) - } - dataStart := ofs + uint64(consumed) - - switch ty { - case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: - body, err = packSectionInflate(pf, dataStart, size) - if err != nil { - return fail(err) - } - bodyReady = true - resultTy = ty - resolved = true - case ObjectTypeRefDelta: - hashEnd := dataStart + uint64(repo.hashAlgo.Size()) - if hashEnd > uint64(len(pf.data)) { - return fail(io.ErrUnexpectedEOF) - } - var base Hash - copy(base.data[:], pf.data[dataStart:hashEnd]) - base.algo = repo.hashAlgo - delta, err := packSectionInflate(pf, hashEnd, 0) - if err != nil { - return fail(err) - } - frames = append(frames, deltaFrame{delta: delta}) - - loc, err := repo.packIndexFind(base) - if err == nil { - pf, err = repo.packFile(loc.PackPath) - if err != nil { - return fail(err) - } - ofs = loc.Offset - continue - } - if !errors.Is(err, ErrNotFound) { - return fail(err) - } - resultTy, body, err = repo.looseReadTyped(base) - if err != nil { - return fail(err) - } - bodyReady = true - resolved = true - case ObjectTypeOfsDelta: - dist, distConsumed, err := packDeltaReadOfsDistance(pf.data[dataStart:]) - if err != nil { - return fail(err) - } - if ofs <= dist { - return fail(ErrInvalidObject) - } - deltaStart := dataStart + uint64(distConsumed) - if deltaStart > uint64(len(pf.data)) { - return fail(ErrInvalidObject) - } - delta, err := packSectionInflate(pf, deltaStart, 0) - if err != nil { - return fail(err) - } - frames = append(frames, deltaFrame{delta: delta}) - ofs -= dist - case ObjectTypeInvalid, ObjectTypeFuture: - return fail(ErrInvalidObject) - default: - return fail(ErrInvalidObject) - } - } - - for i := len(frames) - 1; i >= 0; i-- { - out, err := packDeltaApply(body, frames[i].delta) - body.Release() - bodyReady = false - frames[i].delta.Release() - if err != nil { - return fail(err) - } - body = out - bodyReady = true - } - frames = nil - return resultTy, body, nil -} - -func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) { - pos := 0 - baseBytes := base.Bytes() - deltaBytes := delta.Bytes() - srcSize, err := packVarintRead(deltaBytes, &pos) - if err != nil { - return bufpool.Buffer{}, err - } - dstSize, err := packVarintRead(deltaBytes, &pos) - if err != nil { - return bufpool.Buffer{}, err - } - if srcSize != len(baseBytes) { - return bufpool.Buffer{}, ErrInvalidObject - } - out := bufpool.Borrow(dstSize) - out.Resize(dstSize) - outBytes := out.Bytes() - outPos := 0 - - for pos < len(deltaBytes) { - op := deltaBytes[pos] - pos++ - switch { - case op&0x80 != 0: - off := 0 - n := 0 - if op&0x01 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) - pos++ - } - if op&0x02 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) << 8 - pos++ - } - if op&0x04 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) << 16 - pos++ - } - if op&0x08 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) << 24 - pos++ - } - if op&0x10 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - n |= int(deltaBytes[pos]) - pos++ - } - if op&0x20 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - n |= int(deltaBytes[pos]) << 8 - pos++ - } - if op&0x40 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - n |= int(deltaBytes[pos]) << 16 - pos++ - } - if n == 0 { - n = 0x10000 - } - if off+n > len(baseBytes) || outPos+n > len(outBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - copy(outBytes[outPos:], baseBytes[off:off+n]) - outPos += n - case op != 0: - n := int(op) - if pos+n > len(deltaBytes) || outPos+n > len(outBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - copy(outBytes[outPos:], deltaBytes[pos:pos+n]) - pos += n - outPos += n - default: - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - } - - if outPos != len(outBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - return out, nil -} - -func packVarintRead(buf []byte, pos *int) (int, error) { - res := 0 - shift := 0 - for { - if *pos >= len(buf) { - return 0, ErrInvalidObject - } - b := buf[*pos] - *pos++ - res |= int(b&0x7f) << shift - if (b & 0x80) == 0 { - break - } - shift += 7 - } - return res, nil -} - -type packFile struct { - relPath string - size int64 - data []byte - closeMu sync.Once -} - -func openPackFile(absPath, rel string) (*packFile, error) { - f, err := os.Open(absPath) - if err != nil { - return nil, err - } - - stat, err := f.Stat() - if err != nil { - _ = f.Close() - return nil, err - } - if stat.Size() < 12 { - _ = f.Close() - return nil, ErrInvalidObject - } - - var headerArr [12]byte - header := headerArr[:] - _, err = io.ReadFull(f, header) - if err != nil { - _ = f.Close() - return nil, err - } - magic := binary.BigEndian.Uint32(header[:4]) - ver := binary.BigEndian.Uint32(header[4:8]) - if magic != packMagic || ver != packVersion2 { - _ = f.Close() - return nil, ErrInvalidObject - } - - region, err := syscall.Mmap( - int(f.Fd()), - 0, - int(stat.Size()), - syscall.PROT_READ, - syscall.MAP_PRIVATE, - ) - if err != nil { - _ = f.Close() - return nil, err - } - err = f.Close() - if err != nil { - _ = syscall.Munmap(region) - return nil, err - } - - return &packFile{ - relPath: rel, - size: stat.Size(), - data: region, - }, nil -} - -func (pf *packFile) Close() error { - if pf == nil { - return nil - } - var closeErr error - pf.closeMu.Do(func() { - if len(pf.data) > 0 { - if err := syscall.Munmap(pf.data); closeErr == nil { - closeErr = err - } - pf.data = nil - } - }) - return closeErr -} - -func (repo *Repository) packFile(rel string) (*packFile, error) { - repo.packFilesMu.RLock() - pf, ok := repo.packFiles[rel] - repo.packFilesMu.RUnlock() - if ok { - return pf, nil - } - - pf, err := openPackFile(repo.repoPath(rel), rel) - if err != nil { - return nil, err - } - - repo.packFilesMu.Lock() - if existing, ok := repo.packFiles[rel]; ok { - repo.packFilesMu.Unlock() - _ = pf.Close() - return existing, nil - } - repo.packFiles[rel] = pf - repo.packFilesMu.Unlock() - return pf, nil -} diff --git a/pack_read_test.go b/pack_read_test.go deleted file mode 100644 index 184a4e5c..00000000 --- a/pack_read_test.go +++ /dev/null @@ -1,149 +0,0 @@ -package furgit - -import ( - "bytes" - "fmt" - "os" - "path/filepath" - "strings" - "testing" -) - -func TestPackfileRead(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitCmd(t, repoPath, "config", "gc.auto", "0") - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "file1.txt"), []byte("content1"), 0o644) - if err != nil { - t.Fatalf("failed to write file1.txt: %v", err) - } - err = os.WriteFile(filepath.Join(workDir, "file2.txt"), []byte("content2"), 0o644) - if err != nil { - t.Fatalf("failed to write file2.txt: %v", err) - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Test commit") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - gitCmd(t, repoPath, "repack", "-a", "-d") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hashObj, _ := repo.ParseHash(commitHash) - obj, err := repo.ReadObject(hashObj) - if err != nil { - t.Fatalf("ReadObject from pack failed: %v", err) - } - - commit, ok := obj.(*StoredCommit) - if !ok { - t.Fatalf("expected *StoredCommit, got %T", obj) - } - - treeObj, err := repo.ReadObject(commit.Tree) - if err != nil { - t.Fatalf("ReadObject tree failed: %v", err) - } - - tree, ok := treeObj.(*StoredTree) - if !ok { - t.Fatalf("expected *StoredTree, got %T", treeObj) - } - - if len(tree.Entries) != 2 { - t.Errorf("tree entries: got %d, want 2", len(tree.Entries)) - } - - gitLsTree := gitCmd(t, repoPath, "ls-tree", commit.Tree.String()) - for _, entry := range tree.Entries { - if !strings.Contains(gitLsTree, string(entry.Name)) { - t.Errorf("git ls-tree doesn't contain %s", entry.Name) - } - } -} - -func TestPackfileLarge(t *testing.T) { - if testing.Short() { - t.Skip("skipping large packfile test in short mode") - } - - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitCmd(t, repoPath, "config", "gc.auto", "0") - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - numFiles := 1000 - for i := 0; i < numFiles; i++ { - filename := filepath.Join(workDir, fmt.Sprintf("file%04d.txt", i)) - content := fmt.Sprintf("Content for file %d\n", i) - err := os.WriteFile(filename, []byte(content), 0o644) - if err != nil { - t.Fatalf("failed to write %s: %v", filename, err) - } - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Large commit") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - gitCmd(t, repoPath, "repack", "-a", "-d") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hashObj, _ := repo.ParseHash(commitHash) - obj, _ := repo.ReadObject(hashObj) - commit := obj.(*StoredCommit) - - treeObj, _ := repo.ReadObject(commit.Tree) - tree := treeObj.(*StoredTree) - - if len(tree.Entries) != numFiles { - t.Errorf("tree entries: got %d, want %d", len(tree.Entries), numFiles) - } - - gitCount := gitCmd(t, repoPath, "ls-tree", commit.Tree.String()) - gitLines := strings.Count(gitCount, "\n") + 1 - if len(tree.Entries) != gitLines { - t.Errorf("furgit found %d entries, git found %d", len(tree.Entries), gitLines) - } - - for i := 0; i < 10; i++ { - idx := i * (numFiles / 10) - expectedName := fmt.Sprintf("file%04d.txt", idx) - entry := tree.Entry([]byte(expectedName)) - if entry == nil { - t.Errorf("expected to find entry %s", expectedName) - continue - } - - blobObj, _ := repo.ReadObject(entry.ID) - blob := blobObj.(*StoredBlob) - - expectedContent := fmt.Sprintf("Content for file %d\n", idx) - if string(blob.Data) != expectedContent { - t.Errorf("blob %s: got %q, want %q", expectedName, blob.Data, expectedContent) - } - - gitData := gitCatFile(t, repoPath, "blob", entry.ID.String()) - if !bytes.Equal(blob.Data, gitData) { - t.Errorf("blob %s: furgit data doesn't match git data", expectedName) - } - } -} diff --git a/pktline/pktline.go b/pktline/pktline.go deleted file mode 100644 index b8e4ccee..00000000 --- a/pktline/pktline.go +++ /dev/null @@ -1,164 +0,0 @@ -// Package pktline provides support for the pkt-line format described in gitprotocol-common(5). -package pktline - -import ( - "errors" - "io" -) - -const ( - maxPacketSize = 65520 - maxPacketDataLen = maxPacketSize - 4 -) - -var ( - ErrInvalidHeader = errors.New("pktline: invalid header") - ErrPacketTooLarge = errors.New("pktline: packet too large") - ErrBufferTooSmall = errors.New("pktline: buffer too small") -) - -type Status uint8 - -const ( - StatusEOF Status = iota - StatusData - StatusFlush - StatusDelim - StatusResponseEnd -) - -// ReadLine reads a single pkt-line from r into buf. -// It returns the payload slice, number of payload bytes, and a status. -func ReadLine(r io.Reader, buf []byte) ([]byte, int, Status, error) { - if r == nil { - return nil, 0, StatusEOF, ErrInvalidHeader - } - var header [4]byte - if _, err := io.ReadFull(r, header[:]); err != nil { - if errors.Is(err, io.EOF) { - return nil, 0, StatusEOF, io.EOF - } - if errors.Is(err, io.ErrUnexpectedEOF) { - return nil, 0, StatusEOF, io.ErrUnexpectedEOF - } - return nil, 0, StatusEOF, err - } - - n, err := parseHeader(header[:]) - if err != nil { - return nil, 0, StatusEOF, err - } - switch n { - case 0: - return nil, 0, StatusFlush, nil - case 1: - return nil, 0, StatusDelim, nil - case 2: - return nil, 0, StatusResponseEnd, nil - } - if n < 4 { - return nil, 0, StatusEOF, ErrInvalidHeader - } - n -= 4 - if n > maxPacketDataLen { - return nil, 0, StatusEOF, ErrPacketTooLarge - } - if n > len(buf) { - return nil, 0, StatusEOF, ErrBufferTooSmall - } - if _, err := io.ReadFull(r, buf[:n]); err != nil { - if errors.Is(err, io.ErrUnexpectedEOF) { - return nil, 0, StatusEOF, io.ErrUnexpectedEOF - } - return nil, 0, StatusEOF, err - } - return buf[:n], n, StatusData, nil -} - -// WriteLine writes a single pkt-line with data as its payload. -func WriteLine(w io.Writer, data []byte) error { - if w == nil { - return ErrInvalidHeader - } - if len(data) > maxPacketDataLen { - return ErrPacketTooLarge - } - var header [4]byte - setHeader(header[:], len(data)+4) - if _, err := w.Write(header[:]); err != nil { - return err - } - if len(data) == 0 { - return nil - } - _, err := w.Write(data) - return err -} - -// Flush writes a flush-pkt ("0000"). -func Flush(w io.Writer) error { - return writeLiteral(w, "0000") -} - -// Delim writes a delim-pkt ("0001"). -func Delim(w io.Writer) error { - return writeLiteral(w, "0001") -} - -// ResponseEnd writes a response-end pkt ("0002"). -func ResponseEnd(w io.Writer) error { - return writeLiteral(w, "0002") -} - -func writeLiteral(w io.Writer, s string) error { - if w == nil { - return ErrInvalidHeader - } - _, err := io.WriteString(w, s) - return err -} - -func parseHeader(b []byte) (int, error) { - if len(b) < 4 { - return 0, ErrInvalidHeader - } - v0, ok := hexVal(b[0]) - if !ok { - return 0, ErrInvalidHeader - } - v1, ok := hexVal(b[1]) - if !ok { - return 0, ErrInvalidHeader - } - v2, ok := hexVal(b[2]) - if !ok { - return 0, ErrInvalidHeader - } - v3, ok := hexVal(b[3]) - if !ok { - return 0, ErrInvalidHeader - } - return (v0 << 12) | (v1 << 8) | (v2 << 4) | v3, nil -} - -func setHeader(buf []byte, size int) { - const hex = "0123456789abcdef" - buf[0] = hex[(size>>12)&0x0f] - buf[1] = hex[(size>>8)&0x0f] - buf[2] = hex[(size>>4)&0x0f] - buf[3] = hex[size&0x0f] -} - -// IIRC strconv.ParseUint, encoding/hex.Decode, etc., allocate memory. -func hexVal(b byte) (int, bool) { - switch { - case b >= '0' && b <= '9': - return int(b - '0'), true - case b >= 'a' && b <= 'f': - return int(b-'a') + 10, true - case b >= 'A' && b <= 'F': - return int(b-'A') + 10, true - default: - return 0, false - } -} diff --git a/pktline/pktline_test.go b/pktline/pktline_test.go deleted file mode 100644 index 4dae708b..00000000 --- a/pktline/pktline_test.go +++ /dev/null @@ -1,88 +0,0 @@ -package pktline - -import ( - "bytes" - "errors" - "io" - "testing" -) - -func TestWriteReadLineRoundtrip(t *testing.T) { - var buf bytes.Buffer - payload := []byte("hello\n") - if err := WriteLine(&buf, payload); err != nil { - t.Fatalf("WriteLine: %v", err) - } - - dst := make([]byte, 64) - line, n, status, err := ReadLine(&buf, dst) - if err != nil { - t.Fatalf("ReadLine: %v", err) - } - if status != StatusData { - t.Fatalf("status: got %v, want %v", status, StatusData) - } - if n != len(payload) { - t.Fatalf("n: got %d, want %d", n, len(payload)) - } - if !bytes.Equal(line, payload) { - t.Fatalf("payload: got %q, want %q", line, payload) - } -} - -func TestReadLineSpecialPackets(t *testing.T) { - tests := []struct { - name string - input string - status Status - }{ - {"flush", "0000", StatusFlush}, - {"delim", "0001", StatusDelim}, - {"response_end", "0002", StatusResponseEnd}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - r := bytes.NewBufferString(tt.input) - dst := make([]byte, 16) - line, n, status, err := ReadLine(r, dst) - if err != nil { - t.Fatalf("ReadLine: %v", err) - } - if status != tt.status { - t.Fatalf("status: got %v, want %v", status, tt.status) - } - if n != 0 || len(line) != 0 { - t.Fatalf("expected empty payload, got %d bytes", n) - } - }) - } -} - -func TestReadLineInvalidHeader(t *testing.T) { - r := bytes.NewBufferString("zzzz") - dst := make([]byte, 16) - _, _, _, err := ReadLine(r, dst) - if !errors.Is(err, ErrInvalidHeader) { - t.Fatalf("expected ErrInvalidHeader, got %v", err) - } -} - -func TestReadLineBufferTooSmall(t *testing.T) { - var buf bytes.Buffer - payload := []byte("abcd") - if err := WriteLine(&buf, payload); err != nil { - t.Fatalf("WriteLine: %v", err) - } - dst := make([]byte, 2) - _, _, _, err := ReadLine(&buf, dst) - if !errors.Is(err, ErrBufferTooSmall) { - t.Fatalf("expected ErrBufferTooSmall, got %v", err) - } -} - -func TestWriteLineTooLarge(t *testing.T) { - payload := make([]byte, maxPacketDataLen+1) - if err := WriteLine(io.Discard, payload); !errors.Is(err, ErrPacketTooLarge) { - t.Fatalf("expected ErrPacketTooLarge, got %v", err) - } -} diff --git a/refs.go b/refs.go deleted file mode 100644 index 6efdf5ec..00000000 --- a/refs.go +++ /dev/null @@ -1,471 +0,0 @@ -package furgit - -import ( - "bufio" - "bytes" - "fmt" - "os" - "path" - "path/filepath" - "slices" - "strings" -) - -func (repo *Repository) resolveLooseRef(refname string) (Ref, error) { - data, err := os.ReadFile(repo.repoPath(refname)) - if err != nil { - if os.IsNotExist(err) { - return Ref{}, ErrNotFound - } - return Ref{}, err - } - line := strings.TrimSpace(string(data)) - - if strings.HasPrefix(line, "ref: ") { - target := strings.TrimSpace(line[5:]) - if target == "" { - return Ref{Name: refname, Kind: RefKindInvalid}, ErrInvalidRef - } - return Ref{ - Name: refname, - Kind: RefKindSymbolic, - Ref: target, - }, nil - } - - id, err := repo.ParseHash(line) - if err != nil { - return Ref{Name: refname, Kind: RefKindInvalid}, err - } - return Ref{ - Name: refname, - Kind: RefKindDetached, - Hash: id, - }, nil -} - -func (repo *Repository) resolvePackedRef(refname string) (Ref, error) { - // According to git-pack-refs(1), symbolic refs are never - // stored in packed-refs, so we only need to look for detached - // refs here. - - path := repo.repoPath("packed-refs") - f, err := os.Open(path) - if err != nil { - if os.IsNotExist(err) { - return Ref{}, ErrNotFound - } - return Ref{}, err - } - defer func() { _ = f.Close() }() - - want := []byte(refname) - scanner := bufio.NewScanner(f) - - for scanner.Scan() { - line := scanner.Bytes() - - if len(line) == 0 || line[0] == '#' || line[0] == '^' { - continue - } - - sp := bytes.IndexByte(line, ' ') - if sp != repo.hashAlgo.Size()*2 { - continue - } - - name := line[sp+1:] - - if !bytes.Equal(name, want) { - continue - } - - hex := string(line[:sp]) - id, err := repo.ParseHash(hex) - if err != nil { - return Ref{Name: refname, Kind: RefKindInvalid}, err - } - - ref := Ref{ - Name: refname, - Kind: RefKindDetached, - Hash: id, - } - - if scanner.Scan() { - next := scanner.Bytes() - if len(next) > 0 && next[0] == '^' { - peeledHex := strings.TrimPrefix(string(next), "^") - peeledHex = strings.TrimSpace(peeledHex) - - peeledID, err := repo.ParseHash(peeledHex) - if err != nil { - return Ref{Name: refname, Kind: RefKindInvalid}, err - } - ref.Peeled = peeledID - } - } - - if scanErr := scanner.Err(); scanErr != nil { - return Ref{Name: refname, Kind: RefKindInvalid}, scanErr - } - - return ref, nil - } - - if scanErr := scanner.Err(); scanErr != nil { - return Ref{Name: refname, Kind: RefKindInvalid}, scanErr - } - return Ref{}, ErrNotFound -} - -// RefKind represents the kind of HEAD reference. -type RefKind int - -const ( - // The HEAD reference is invalid. - RefKindInvalid RefKind = iota - // The HEAD reference points to a detached commit hash. - RefKindDetached - // The HEAD reference points to a symbolic ref. - RefKindSymbolic -) - -// Ref represents a reference. -type Ref struct { - // Name is the fully qualified ref name (e.g., refs/heads/main). - // It may be empty for detached hashes that were not looked up - // by name (e.g., ResolveRef on a raw hash). - Name string - // Kind is the kind of the reference. - Kind RefKind - // When Kind is RefKindSymbolic, Ref is the fully qualified ref name. - // Otherwise the value is undefined. - Ref string - // When Kind is RefKindDetached, Hash is the commit hash. - // Otherwise the value is undefined. - Hash Hash - // When Kind is RefKindDetached, and the ref supposedly points to an - // annotated tag, Peeled is the peeled hash, i.e., the hash of the - // object that the tag points to. - Peeled Hash -} - -type refParseRule struct { - fmtStr string - prefix string - suffix string -} - -func parseRule(rule string) refParseRule { - prefix, suffix, _ := strings.Cut(rule, "%s") - return refParseRule{ - fmtStr: rule, - prefix: prefix, - suffix: suffix, - } -} - -var refRevParseRules = []refParseRule{ - parseRule("%s"), - parseRule("refs/%s"), - parseRule("refs/tags/%s"), - parseRule("refs/heads/%s"), - parseRule("refs/remotes/%s"), - parseRule("refs/remotes/%s/HEAD"), -} - -func (rule refParseRule) match(name string) (string, bool) { - if rule.suffix != "" { - if !strings.HasSuffix(name, rule.suffix) { - return "", false - } - name = strings.TrimSuffix(name, rule.suffix) - } - - var short string - n, err := fmt.Sscanf(name, rule.prefix+"%s", &short) - if err != nil || n != 1 { - return "", false - } - if fmt.Sprintf(rule.prefix+"%s", short) != name { - return "", false - } - return short, true -} - -func (rule refParseRule) render(short string) string { - return rule.prefix + short + rule.suffix -} - -// Short returns the shortest unambiguous shorthand for the ref name, -// following the rev-parse rules used by Git. The provided list of refs -// is used to test for ambiguity. -// -// When strict is true, all other rules must fail to resolve to an -// existing ref; otherwise only rules prior to the matched rule must -// fail. -func (ref *Ref) Short(all []Ref, strict bool) string { - if ref == nil { - return "" - } - name := ref.Name - if name == "" { - return "" - } - - names := make(map[string]struct{}, len(all)) - for _, r := range all { - if r.Name == "" { - continue - } - names[r.Name] = struct{}{} - } - - for i := len(refRevParseRules) - 1; i > 0; i-- { - short, ok := refRevParseRules[i].match(name) - if !ok { - continue - } - - rulesToFail := i - if strict { - rulesToFail = len(refRevParseRules) - } - - ambiguous := false - for j := 0; j < rulesToFail; j++ { - if j == i { - continue - } - full := refRevParseRules[j].render(short) - if _, found := names[full]; found { - ambiguous = true - break - } - } - - if !ambiguous { - return short - } - } - - return name -} - -// ResolveRef reads the given fully qualified ref (such as "HEAD" or "refs/heads/main") -// and interprets its contents as either a symbolic ref ("ref: refs/..."), a detached -// hash, or invalid. -// If path is empty, it defaults to "HEAD". -// (While typically only HEAD may be a symbolic reference, others may be as well.) -func (repo *Repository) ResolveRef(path string) (Ref, error) { - if path == "" { - path = "HEAD" - } - - if !strings.HasPrefix(path, "refs/") && !slices.Contains([]string{ - "HEAD", "ORIG_HEAD", "FETCH_HEAD", "MERGE_HEAD", - "CHERRY_PICK_HEAD", "REVERT_HEAD", "REBASE_HEAD", "BISECT_HEAD", - }, path) { - id, err := repo.ParseHash(path) - if err == nil { - return Ref{ - Name: path, - Kind: RefKindDetached, - Hash: id, - }, nil - } - - // For now let's keep this to prevent e.g., random users from - // specifying something crazy like objects/... or ./config. - // There may be other legal pseudo-refs in the future, - // but it's probably the best to stay cautious for now. - return Ref{Name: path, Kind: RefKindInvalid}, ErrInvalidRef - } - - loose, err := repo.resolveLooseRef(path) - if err == nil { - return loose, nil - } - if err != ErrNotFound { - return Ref{Name: path, Kind: RefKindInvalid}, err - } - - packed, err := repo.resolvePackedRef(path) - if err == nil { - return packed, nil - } - if err != ErrNotFound { - return Ref{Name: path, Kind: RefKindInvalid}, err - } - - return Ref{Name: path, Kind: RefKindInvalid}, ErrNotFound -} - -// ResolveRefFully resolves a ref by recursively following -// symbolic references until it reaches a detached ref. -// Symbolic cycles are detected and reported. -// Annotated tags are not peeled. -func (repo *Repository) ResolveRefFully(path string) (Ref, error) { - seen := make(map[string]struct{}) - return repo.resolveRefFully(path, seen) -} - -func (repo *Repository) resolveRefFully(path string, seen map[string]struct{}) (Ref, error) { - if _, found := seen[path]; found { - return Ref{}, fmt.Errorf("symbolic ref cycle involving %q", path) - } - seen[path] = struct{}{} - - ref, err := repo.ResolveRef(path) - if err != nil { - return Ref{}, err - } - - switch ref.Kind { - case RefKindDetached: - return ref, nil - - case RefKindSymbolic: - if ref.Ref == "" { - return Ref{}, ErrInvalidRef - } - return repo.resolveRefFully(ref.Ref, seen) - - default: - return Ref{}, ErrInvalidRef - } -} - -// ListRefs lists refs similarly to git-show-ref. -// -// The pattern must be empty or begin with "refs/". An empty pattern is -// treated as "refs/*". -// -// Loose refs are resolved using filesystem globbing relative to the -// repository root, then packed refs are read while skipping any names -// that already appeared as loose refs. Packed refs are filtered -// similarly. -func (repo *Repository) ListRefs(pattern string) ([]Ref, error) { - if pattern == "" { - pattern = "refs/*" - } - if !strings.HasPrefix(pattern, "refs/") { - return nil, ErrInvalidRef - } - if filepath.IsAbs(pattern) { - return nil, ErrInvalidRef - } - - var out []Ref - seen := make(map[string]struct{}) - - globPattern := filepath.Join(repo.rootPath, filepath.FromSlash(pattern)) - matches, err := filepath.Glob(globPattern) - if err != nil { - return nil, err - } - for _, match := range matches { - info, statErr := os.Stat(match) - if statErr != nil { - return nil, statErr - } - if info.IsDir() { - continue - } - - rel, relErr := filepath.Rel(repo.rootPath, match) - if relErr != nil { - return nil, relErr - } - name := filepath.ToSlash(rel) - if !strings.HasPrefix(name, "refs/") { - continue - } - - ref, resolveErr := repo.resolveLooseRef(name) - if resolveErr != nil { - if resolveErr == ErrNotFound || os.IsNotExist(resolveErr) { - continue - } - return nil, resolveErr - } - - seen[name] = struct{}{} - out = append(out, ref) - } - - packedPath := repo.repoPath("packed-refs") - f, err := os.Open(packedPath) - if err != nil { - if os.IsNotExist(err) { - return out, nil - } - return nil, err - } - defer func() { _ = f.Close() }() - - scanner := bufio.NewScanner(f) - lastIdx := -1 - for scanner.Scan() { - line := scanner.Bytes() - if len(line) == 0 || line[0] == '#' { - continue - } - - if line[0] == '^' { - if lastIdx < 0 { - continue - } - peeledHex := strings.TrimPrefix(string(line), "^") - peeledHex = strings.TrimSpace(peeledHex) - peeled, parseErr := repo.ParseHash(peeledHex) - if parseErr != nil { - return nil, parseErr - } - out[lastIdx].Peeled = peeled - continue - } - - sp := bytes.IndexByte(line, ' ') - if sp != repo.hashAlgo.Size()*2 { - lastIdx = -1 - continue - } - - name := string(line[sp+1:]) - if !strings.HasPrefix(name, "refs/") { - lastIdx = -1 - continue - } - if _, ok := seen[name]; ok { - lastIdx = -1 - continue - } - - match, matchErr := path.Match(pattern, name) - if matchErr != nil { - return nil, matchErr - } - if !match { - lastIdx = -1 - continue - } - - hash, parseErr := repo.ParseHash(string(line[:sp])) - if parseErr != nil { - return nil, parseErr - } - out = append(out, Ref{ - Name: name, - Kind: RefKindDetached, - Hash: hash, - }) - lastIdx = len(out) - 1 - } - if scanErr := scanner.Err(); scanErr != nil { - return nil, scanErr - } - - return out, nil -} diff --git a/refs_test.go b/refs_test.go deleted file mode 100644 index 2d4a1532..00000000 --- a/refs_test.go +++ /dev/null @@ -1,520 +0,0 @@ -package furgit - -import ( - "os" - "path/filepath" - "strings" - "testing" -) - -func TestResolveRef(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "test.txt"), []byte("content"), 0o644) - if err != nil { - t.Fatalf("Failed to write test.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "test") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - gitCmd(t, repoPath, "update-ref", "refs/heads/main", commitHash) - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hashObj, _ := repo.ParseHash(commitHash) - resolved, err := repo.ResolveRef("refs/heads/main") - if err != nil { - t.Fatalf("ResolveRef failed: %v", err) - } - - if resolved.Kind != RefKindDetached { - t.Fatalf("expected detached ref, got %v", resolved.Kind) - } - if resolved.Hash != hashObj { - t.Errorf("resolved hash: got %s, want %s", resolved.Hash, hashObj) - } - - gitRevParse := gitCmd(t, repoPath, "rev-parse", "refs/heads/main") - if resolved.Hash.String() != gitRevParse { - t.Errorf("furgit resolved %s, git resolved %s", resolved.Hash, gitRevParse) - } - - _, err = repo.ResolveRef("refs/heads/nonexistent") - if err == nil { - t.Error("expected error for nonexistent ref") - } -} - -func TestResolveHEAD(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "test.txt"), []byte("content"), 0o644) - if err != nil { - t.Fatalf("failed to write test.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "test") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - gitCmd(t, repoPath, "update-ref", "refs/heads/main", commitHash) - gitCmd(t, repoPath, "symbolic-ref", "HEAD", "refs/heads/main") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - ref, err := repo.ResolveRef("HEAD") - if err != nil { - t.Fatalf("ResolveRef(HEAD) failed: %v", err) - } - - if ref.Kind != RefKindSymbolic { - t.Fatalf("HEAD kind: got %v, want %v", ref.Kind, RefKindSymbolic) - } - - if ref.Ref != "refs/heads/main" { - t.Errorf("HEAD symbolic ref: got %q, want %q", ref.Ref, "refs/heads/main") - } - - gitSymRef := gitCmd(t, repoPath, "symbolic-ref", "HEAD") - if ref.Ref != gitSymRef { - t.Errorf("furgit resolved %v, git resolved %s", ref.Ref, gitSymRef) - } -} - -func TestPackedRefs(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "test.txt"), []byte("content1"), 0o644) - if err != nil { - t.Fatalf("failed to write test.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "commit1") - commit1Hash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - err = os.WriteFile(filepath.Join(workDir, "test2.txt"), []byte("content2"), 0o644) - if err != nil { - t.Fatalf("failed to write test2.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "commit2") - commit2Hash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - gitCmd(t, repoPath, "update-ref", "refs/heads/branch1", commit1Hash) - gitCmd(t, repoPath, "update-ref", "refs/heads/branch2", commit2Hash) - gitCmd(t, repoPath, "update-ref", "refs/tags/v1.0", commit1Hash) - - gitCmd(t, repoPath, "pack-refs", "--all") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hash1, _ := repo.ParseHash(commit1Hash) - hash2, _ := repo.ParseHash(commit2Hash) - - resolved1, err := repo.ResolveRef("refs/heads/branch1") - if err != nil { - t.Fatalf("ResolveRef branch1 failed: %v", err) - } - if resolved1.Kind != RefKindDetached || resolved1.Hash != hash1 { - t.Errorf("branch1: got %s, want %s", resolved1.Hash, hash1) - } - - gitResolved1 := gitCmd(t, repoPath, "rev-parse", "refs/heads/branch1") - if resolved1.Hash.String() != gitResolved1 { - t.Errorf("furgit resolved %s, git resolved %s", resolved1.Hash, gitResolved1) - } - - resolved2, err := repo.ResolveRef("refs/heads/branch2") - if err != nil { - t.Fatalf("ResolveRef branch2 failed: %v", err) - } - if resolved2.Kind != RefKindDetached || resolved2.Hash != hash2 { - t.Errorf("branch2: got %s, want %s", resolved2.Hash, hash2) - } - - resolvedTag, err := repo.ResolveRef("refs/tags/v1.0") - if err != nil { - t.Fatalf("ResolveRef tag failed: %v", err) - } - if resolvedTag.Kind != RefKindDetached || resolvedTag.Hash != hash1 { - t.Errorf("tag: got %s, want %s", resolvedTag.Hash, hash1) - } -} - -func TestResolveRefFully(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - // Create an initial commit - err := os.WriteFile(filepath.Join(workDir, "file.txt"), []byte("content"), 0o644) - if err != nil { - t.Fatalf("failed to write file.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "init") - commit := gitCmd(t, repoPath, "rev-parse", "HEAD") - - // Create two layers of symbolic refs - gitCmd(t, repoPath, "symbolic-ref", "refs/heads/level1", "refs/heads/level2") - gitCmd(t, repoPath, "symbolic-ref", "refs/heads/level2", "refs/heads/main") - gitCmd(t, repoPath, "update-ref", "refs/heads/main", commit) - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - commitHash, err := repo.ParseHash(commit) - if err != nil { - t.Fatalf("ParseHash failed: %v", err) - } - - resolved, err := repo.ResolveRefFully("refs/heads/level1") - if err != nil { - t.Fatalf("ResolveRefFully failed: %v", err) - } - - if resolved.Hash != commitHash { - t.Errorf("ResolveRefFully: got hash %s, want %s", resolved.Hash, commitHash) - } -} - -func TestResolveRefFullySymbolicCycle(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - gitCmd(t, repoPath, "symbolic-ref", "refs/heads/A", "refs/heads/B") - gitCmd(t, repoPath, "symbolic-ref", "refs/heads/B", "refs/heads/A") - - _, err = repo.ResolveRefFully("refs/heads/A") - if err == nil { - t.Fatalf("ResolveRefFully should fail on a symbolic cycle") - } - - if !strings.Contains(err.Error(), "cycle") { - t.Fatalf("unexpected error for symbolic cycle: %v", err) - } -} - -func TestResolveRefHashInput(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "file.txt"), []byte("content"), 0o644) - if err != nil { - t.Fatalf("failed to write file.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "init") - - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hashObj, err := repo.ParseHash(commitHash) - if err != nil { - t.Fatalf("ParseHash failed: %v", err) - } - - ref, err := repo.ResolveRef(commitHash) - if err != nil { - t.Fatalf("ResolveRef(hash) failed: %v", err) - } - if ref.Kind != RefKindDetached { - t.Fatalf("expected RefKindDetached, got %v", ref.Kind) - } - if ref.Hash != hashObj { - t.Fatalf("hash mismatch: got %s, want %s", ref.Hash, hashObj) - } - - hashRef, err := repo.ResolveRefFully(commitHash) - if err != nil { - t.Fatalf("ResolveRefFully(hash) failed: %v", err) - } - if hashRef.Hash != hashObj { - t.Fatalf("hash mismatch: got %s, want %s", hashRef.Hash, hashObj) - } - - _, err = repo.ResolveRef("this_is_not_a_hash") - if err == nil { - t.Fatalf("expected error for invalid hash input") - } -} - -func TestListRefsLooseOverridesPacked(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - gitCmd(t, repoPath, "symbolic-ref", "HEAD", "refs/heads/main") - - err := os.WriteFile(filepath.Join(workDir, "file.txt"), []byte("one"), 0o644) - if err != nil { - t.Fatalf("failed to write file.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "c1") - commit1 := gitCmd(t, repoPath, "rev-parse", "HEAD") - - gitCmd(t, repoPath, "update-ref", "refs/heads/main", commit1) - gitCmd(t, repoPath, "update-ref", "refs/heads/feature", commit1) - gitCmd(t, repoPath, "pack-refs", "--all", "--prune") - - err = os.WriteFile(filepath.Join(workDir, "file.txt"), []byte("two"), 0o644) - if err != nil { - t.Fatalf("failed to write file.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "c2") - commit2 := gitCmd(t, repoPath, "rev-parse", "HEAD") - gitCmd(t, repoPath, "update-ref", "refs/heads/main", commit2) - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hash1, _ := repo.ParseHash(commit1) - hash2, _ := repo.ParseHash(commit2) - - refs, err := repo.ListRefs("refs/heads/*") - if err != nil { - t.Fatalf("ListRefs failed: %v", err) - } - - if len(refs) != 2 { - t.Fatalf("expected 2 refs, got %d", len(refs)) - } - - got := make(map[string]Ref, len(refs)) - for _, r := range refs { - if _, exists := got[r.Name]; exists { - t.Fatalf("duplicate ref %q in results", r.Name) - } - got[r.Name] = r - } - - mainRef, ok := got["refs/heads/main"] - if !ok { - t.Fatalf("missing refs/heads/main in results") - } - if mainRef.Kind != RefKindDetached || mainRef.Hash != hash2 { - t.Fatalf("refs/heads/main hash: got %s (kind %v), want %s", mainRef.Hash, mainRef.Kind, hash2) - } - - featureRef, ok := got["refs/heads/feature"] - if !ok { - t.Fatalf("missing refs/heads/feature in results") - } - if featureRef.Kind != RefKindDetached || featureRef.Hash != hash1 { - t.Fatalf("refs/heads/feature hash: got %s (kind %v), want %s", featureRef.Hash, featureRef.Kind, hash1) - } -} - -func TestListRefsPatternFiltering(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - gitCmd(t, repoPath, "symbolic-ref", "HEAD", "refs/heads/main") - - err := os.WriteFile(filepath.Join(workDir, "file.txt"), []byte("one"), 0o644) - if err != nil { - t.Fatalf("failed to write file.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "c1") - commit1 := gitCmd(t, repoPath, "rev-parse", "HEAD") - - gitCmd(t, repoPath, "update-ref", "refs/heads/main", commit1) - gitCmd(t, repoPath, "update-ref", "refs/heads/feature", commit1) - gitCmd(t, repoPath, "pack-refs", "--all", "--prune") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hash1, _ := repo.ParseHash(commit1) - - refs, err := repo.ListRefs("refs/heads/fea*") - if err != nil { - t.Fatalf("ListRefs failed: %v", err) - } - if len(refs) != 1 { - t.Fatalf("expected 1 ref, got %d", len(refs)) - } - if refs[0].Name != "refs/heads/feature" { - t.Fatalf("unexpected ref name: got %q, want %q", refs[0].Name, "refs/heads/feature") - } - if refs[0].Kind != RefKindDetached || refs[0].Hash != hash1 { - t.Fatalf("refs/heads/feature hash: got %s (kind %v), want %s", refs[0].Hash, refs[0].Kind, hash1) - } -} - -func TestListRefsPackedPatterns(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - gitCmd(t, repoPath, "symbolic-ref", "HEAD", "refs/heads/main") - - err := os.WriteFile(filepath.Join(workDir, "file.txt"), []byte("one"), 0o644) - if err != nil { - t.Fatalf("failed to write file.txt: %v", err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "c1") - commit := gitCmd(t, repoPath, "rev-parse", "HEAD") - - gitCmd(t, repoPath, "update-ref", "refs/heads/main", commit) - gitCmd(t, repoPath, "update-ref", "refs/heads/feature/one", commit) - gitCmd(t, repoPath, "update-ref", "refs/notes/review", commit) - gitCmd(t, repoPath, "update-ref", "refs/tags/v1", commit) - gitCmd(t, repoPath, "pack-refs", "--all", "--prune") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - tests := []struct { - pattern string - want []string - }{ - { - pattern: "refs/heads/*", - want: []string{"refs/heads/main"}, - }, - { - pattern: "refs/heads/*/*", - want: []string{"refs/heads/feature/one"}, - }, - { - pattern: "refs/*/feature/one", - want: []string{"refs/heads/feature/one"}, - }, - { - pattern: "refs/heads/feat?re/one", - want: []string{"refs/heads/feature/one"}, - }, - { - pattern: "refs/tags/v[0-9]", - want: []string{"refs/tags/v1"}, - }, - { - pattern: "refs/*/*", - want: []string{"refs/heads/main", "refs/notes/review", "refs/tags/v1"}, - }, - } - - for _, tt := range tests { - t.Run(tt.pattern, func(t *testing.T) { - refs, err := repo.ListRefs(tt.pattern) - if err != nil { - t.Fatalf("ListRefs(%q) failed: %v", tt.pattern, err) - } - - got := make(map[string]struct{}, len(refs)) - for _, r := range refs { - got[r.Name] = struct{}{} - } - - want := make(map[string]struct{}, len(tt.want)) - for _, w := range tt.want { - want[w] = struct{}{} - } - - if len(got) != len(want) { - t.Fatalf("ListRefs(%q) returned %d refs, want %d", tt.pattern, len(got), len(want)) - } - for name := range got { - if _, ok := want[name]; !ok { - t.Fatalf("ListRefs(%q) unexpected ref %q", tt.pattern, name) - } - } - }) - } -} - -func TestRefShort(t *testing.T) { - t.Run("unambiguous", func(t *testing.T) { - ref := Ref{Name: "refs/heads/main"} - short := ref.Short([]Ref{ref}, false) - if short != "main" { - t.Fatalf("expected short name %q, got %q", "main", short) - } - }) - - t.Run("ambiguous", func(t *testing.T) { - ref := Ref{Name: "refs/heads/main"} - tags := Ref{Name: "refs/tags/main"} - short := ref.Short([]Ref{ref, tags}, false) - if short != "heads/main" { - t.Fatalf("expected ambiguous ref to shorten to %q, got %q", "heads/main", short) - } - }) - - t.Run("strict", func(t *testing.T) { - ref := Ref{Name: "refs/heads/main"} - remoteHead := Ref{Name: "refs/remotes/main/HEAD"} - - shortNonStrict := ref.Short([]Ref{ref, remoteHead}, false) - if shortNonStrict != "main" { - t.Fatalf("expected non-strict short name %q, got %q", "main", shortNonStrict) - } - - shortStrict := ref.Short([]Ref{ref, remoteHead}, true) - if shortStrict != "heads/main" { - t.Fatalf("expected strict ambiguity to shorten to %q, got %q", "heads/main", shortStrict) - } - }) -} diff --git a/repo.go b/repo.go deleted file mode 100644 index 4a4ebcc6..00000000 --- a/repo.go +++ /dev/null @@ -1,160 +0,0 @@ -package furgit - -import ( - "encoding/hex" - "fmt" - "os" - "path/filepath" - "sync" - - "codeberg.org/lindenii/furgit/config" -) - -// Repository represents a Git repository. -// -// It is safe to access the same Repository from multiple goroutines -// without additional synchronization. -// -// Objects derived from a Repository must not be used after the Repository -// has been closed. -type Repository struct { - rootPath string - hashAlgo hashAlgorithm - - packIdxOnce sync.Once - packIdx []*packIndex - packIdxErr error - - packFiles map[string]*packFile - packFilesMu sync.RWMutex - closeOnce sync.Once -} - -// OpenRepository opens the repository at the provided path. -// -// The path is expected to be the actual repository directory, i.e., -// the repository itself for bare repositories, or the .git -// subdirectory for non-bare repositories. -func OpenRepository(path string) (*Repository, error) { - fi, err := os.Stat(path) - if err != nil { - return nil, err - } - if !fi.IsDir() { - return nil, ErrInvalidObject - } - - cfgPath := filepath.Join(path, "config") - f, err := os.Open(cfgPath) - if err != nil { - return nil, fmt.Errorf("furgit: unable to open config: %w", err) - } - defer func() { - _ = f.Close() - }() - - cfg, err := config.ParseConfig(f) - if err != nil { - return nil, fmt.Errorf("furgit: failed to parse config: %w", err) - } - - algo := cfg.Get("extensions", "", "objectformat") - if algo == "" { - algo = "sha1" - } - - hashAlgo, ok := parseHashAlgorithm(algo) - if !ok { - return nil, fmt.Errorf("furgit: unsupported hash algorithm %q", algo) - } - - return &Repository{ - rootPath: path, - hashAlgo: hashAlgo, - packFiles: make(map[string]*packFile), - }, nil -} - -// Close closes the repository, releasing any resources associated with it. -// -// It is safe to call Close multiple times; subsequent calls will have no -// effect. -// -// Close invalidates any objects derived from the Repository as it; -// using them may cause segmentation faults or other undefined behavior. -func (repo *Repository) Close() error { - var closeErr error - repo.closeOnce.Do(func() { - repo.packFilesMu.Lock() - for key, pf := range repo.packFiles { - err := pf.Close() - if err != nil && closeErr == nil { - closeErr = err - } - delete(repo.packFiles, key) - } - repo.packFilesMu.Unlock() - if len(repo.packIdx) > 0 { - for _, idx := range repo.packIdx { - err := idx.Close() - if err != nil && closeErr == nil { - closeErr = err - } - } - } - }) - return closeErr -} - -// repoPath joins the root with a relative path. -func (repo *Repository) repoPath(rel string) string { - return filepath.Join(repo.rootPath, rel) -} - -// ParseHash converts a hex string into a Hash, validating -// it matches the repository's hash size. -func (repo *Repository) ParseHash(s string) (Hash, error) { - var id Hash - if len(s)%2 != 0 { - return id, fmt.Errorf("furgit: invalid hash length %d, it has to be even at the very least", len(s)) - } - expectedLen := repo.hashAlgo.Size() * 2 - if len(s) != expectedLen { - return id, fmt.Errorf("furgit: hash length mismatch: got %d chars, expected %d for hash size %d", len(s), expectedLen, repo.hashAlgo.Size()) - } - data, err := hex.DecodeString(s) - if err != nil { - return id, fmt.Errorf("furgit: decode hash: %w", err) - } - copy(id.data[:], data) - id.algo = repo.hashAlgo - return id, nil -} - -// computeRawHash computes a hash from raw data using the repository's hash algorithm. -func (repo *Repository) computeRawHash(data []byte) Hash { - return repo.hashAlgo.Sum(data) -} - -// verifyRawObject verifies a raw object against its expected hash. -func (repo *Repository) verifyRawObject(buf []byte, want Hash) bool { //nolint:unused - if want.algo != repo.hashAlgo { - return false - } - return repo.computeRawHash(buf) == want -} - -// verifyTypedObject verifies a typed object against its expected hash. -func (repo *Repository) verifyTypedObject(ty ObjectType, body []byte, want Hash) bool { //nolint:unused - if want.algo != repo.hashAlgo { - return false - } - header, err := headerForType(ty, body) - if err != nil { - return false - } - raw := make([]byte, len(header)+len(body)) - copy(raw, header) - copy(raw[len(header):], body) - return repo.computeRawHash(raw) == want -} diff --git a/repo_current_test.go b/repo_current_test.go deleted file mode 100644 index e7cab8e6..00000000 --- a/repo_current_test.go +++ /dev/null @@ -1,70 +0,0 @@ -package furgit - -import ( - "os" - "path/filepath" - "testing" -) - -func TestCurrentRepoDepthFirstEnumeration(t *testing.T) { - gitDir := filepath.Join(".git") - if _, err := os.Stat(gitDir); os.IsNotExist(err) { - t.Skip("no .git directory found in current repo") - } - - repo, err := OpenRepository(gitDir) - if err != nil { - t.Fatalf("failed to open current .git directory: %v", err) - } - defer func() { _ = repo.Close() }() - - headHash, err := repo.ResolveRefFully("HEAD") - if err != nil { - t.Fatalf("failed to resolve HEAD: %v", err) - } - - visited := make(map[Hash]bool) - var queue []Hash - queue = append(queue, headHash.Hash) - - objectsRead := 0 - - for len(queue) > 0 { - hash := queue[0] - queue = queue[1:] - - if visited[hash] { - continue - } - visited[hash] = true - - obj, err := repo.ReadObject(hash) - if err != nil { - t.Fatalf("failed to read object %s: %v", hash, err) - } - objectsRead++ - - switch o := obj.(type) { - case *StoredCommit: - queue = append(queue, o.Tree) - queue = append(queue, o.Parents...) - - case *StoredTree: - for _, entry := range o.Entries { - queue = append(queue, entry.ID) - } - - case *StoredTag: - queue = append(queue, o.Target) - - case *StoredBlob: - - default: - t.Errorf("unexpected object type: %T", o) - } - } - - if objectsRead == 0 { - t.Fatal("no objects were read from the repository") - } -} diff --git a/repo_test.go b/repo_test.go deleted file mode 100644 index 3e622e37..00000000 --- a/repo_test.go +++ /dev/null @@ -1,49 +0,0 @@ -package furgit - -import ( - "testing" -) - -func TestRepositoryOpen(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - if repo.rootPath != repoPath { - t.Errorf("rootPath: got %q, want %q", repo.rootPath, repoPath) - } - hashSize := repo.hashAlgo.Size() - if hashSize != 32 && hashSize != 20 { - t.Errorf("hashSize: got %d, want 32 (SHA-256) or 20 (SHA-1)", hashSize) - } -} - -func TestRepositoryOpenInvalid(t *testing.T) { - _, err := OpenRepository("/nonexistent/path") - if err == nil { - t.Fatal("expected error for nonexistent path") - } -} - -func TestRepositoryClose(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - - if err := repo.Close(); err != nil { - t.Fatalf("Close failed: %v", err) - } - - if err := repo.Close(); err != nil { - t.Fatalf("second Close failed: %v", err) - } -} diff --git a/testutil_sha1_test.go b/testutil_sha1_test.go deleted file mode 100644 index c8ce87eb..00000000 --- a/testutil_sha1_test.go +++ /dev/null @@ -1,29 +0,0 @@ -//go:build sha1 - -package furgit - -import ( - "os" - "os/exec" - "testing" -) - -func setupTestRepo(t *testing.T) (string, func()) { - t.Helper() - tempDir, err := os.MkdirTemp("", "furgit-test-*.git") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - cleanup := func() { - _ = os.RemoveAll(tempDir) - } - - cmd := exec.Command("git", "init", "--object-format=sha1", "--bare", tempDir) - cmd.Env = append(os.Environ(), "GIT_CONFIG_GLOBAL=/dev/null", "GIT_CONFIG_SYSTEM=/dev/null") - if output, err := cmd.CombinedOutput(); err != nil { - cleanup() - t.Fatalf("failed to init git repo: %v\n%s", err, output) - } - - return tempDir, cleanup -} diff --git a/testutil_sha256_test.go b/testutil_sha256_test.go deleted file mode 100644 index db5cb40d..00000000 --- a/testutil_sha256_test.go +++ /dev/null @@ -1,29 +0,0 @@ -//go:build !sha1 - -package furgit - -import ( - "os" - "os/exec" - "testing" -) - -func setupTestRepo(t *testing.T) (string, func()) { - t.Helper() - tempDir, err := os.MkdirTemp("", "furgit-test-*.git") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - cleanup := func() { - _ = os.RemoveAll(tempDir) - } - - cmd := exec.Command("git", "init", "--object-format=sha256", "--bare", tempDir) - cmd.Env = append(os.Environ(), "GIT_CONFIG_GLOBAL=/dev/null", "GIT_CONFIG_SYSTEM=/dev/null") - if output, err := cmd.CombinedOutput(); err != nil { - cleanup() - t.Fatalf("failed to init git repo: %v\n%s", err, output) - } - - return tempDir, cleanup -} diff --git a/testutil_test.go b/testutil_test.go deleted file mode 100644 index bca6db14..00000000 --- a/testutil_test.go +++ /dev/null @@ -1,67 +0,0 @@ -package furgit - -import ( - "bytes" - "os" - "os/exec" - "strings" - "testing" -) - -func setupWorkDir(t *testing.T) (string, func()) { - t.Helper() - workDir, err := os.MkdirTemp("", "furgit-work-*") - if err != nil { - t.Fatalf("failed to create work dir: %v", err) - } - return workDir, func() { _ = os.RemoveAll(workDir) } -} - -func gitCmd(t *testing.T, dir string, args ...string) string { - t.Helper() - cmd := exec.Command("git", args...) - cmd.Dir = dir - cmd.Env = append(os.Environ(), - "GIT_CONFIG_GLOBAL=/dev/null", - "GIT_CONFIG_SYSTEM=/dev/null", - "GIT_AUTHOR_NAME=Test Author", - "GIT_AUTHOR_EMAIL=test@example.org", - "GIT_COMMITTER_NAME=Test Committer", - "GIT_COMMITTER_EMAIL=committer@example.org", - "GIT_AUTHOR_DATE=1234567890 +0000", - "GIT_COMMITTER_DATE=1234567890 +0000", - ) - output, err := cmd.CombinedOutput() - if err != nil { - t.Fatalf("git %v failed: %v\n%s", args, err, output) - } - return strings.TrimSpace(string(output)) -} - -func gitHashObject(t *testing.T, dir, objType string, data []byte) string { - t.Helper() - cmd := exec.Command("git", "hash-object", "-t", objType, "-w", "--stdin") - cmd.Dir = dir - cmd.Stdin = bytes.NewReader(data) - cmd.Env = append(os.Environ(), "GIT_CONFIG_GLOBAL=/dev/null", "GIT_CONFIG_SYSTEM=/dev/null") - output, err := cmd.CombinedOutput() - if err != nil { - t.Fatalf("git hash-object failed: %v\n%s", err, output) - } - return strings.TrimSpace(string(output)) -} - -func gitCatFile(t *testing.T, dir, objType, hash string) []byte { - t.Helper() - cmd := exec.Command("git", "cat-file", objType, hash) - cmd.Dir = dir - cmd.Env = append(os.Environ(), "GIT_CONFIG_GLOBAL=/dev/null", "GIT_CONFIG_SYSTEM=/dev/null") - output, err := cmd.CombinedOutput() - if err != nil { - t.Fatalf("git cat-file %s %s failed: %v\n%s", objType, hash, err, output) - } - if objType == "-t" || objType == "-s" { - return bytes.TrimSpace(output) - } - return output -} |
